From 977d96ed0bd7d813b6f5498e9423d70a02e190ac Mon Sep 17 00:00:00 2001 From: avantonder Date: Fri, 29 Oct 2021 13:33:38 +0100 Subject: [PATCH 01/15] Add Racon module to nf-core/modules (#949) * add racon * add racon * add racon * add racon module * add racon module * edit racon module * edit racon module * edit racon module * edit racon module Co-authored-by: Daniel Straub <42973691+d4straub@users.noreply.github.com> Co-authored-by: Chris Cheshire --- modules/racon/functions.nf | 78 +++++++++++++++++++++++++++ modules/racon/main.nf | 45 ++++++++++++++++ modules/racon/meta.yml | 52 ++++++++++++++++++ tests/config/pytest_modules.yml | 4 ++ tests/config/test_data.config | 5 +- tests/modules/minimap2/align/test.yml | 2 +- tests/modules/racon/main.nf | 15 ++++++ tests/modules/racon/test.yml | 7 +++ 8 files changed, 206 insertions(+), 2 deletions(-) create mode 100644 modules/racon/functions.nf create mode 100644 modules/racon/main.nf create mode 100644 modules/racon/meta.yml create mode 100644 tests/modules/racon/main.nf create mode 100644 tests/modules/racon/test.yml diff --git a/modules/racon/functions.nf b/modules/racon/functions.nf new file mode 100644 index 00000000..85628ee0 --- /dev/null +++ b/modules/racon/functions.nf @@ -0,0 +1,78 @@ +// +// Utility functions used in nf-core DSL2 module files +// + +// +// Extract name of software tool from process name using $task.process +// +def getSoftwareName(task_process) { + return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase() +} + +// +// Extract name of module from process name using $task.process +// +def getProcessName(task_process) { + return task_process.tokenize(':')[-1] +} + +// +// Function to initialise default values and to generate a Groovy Map of available options for nf-core modules +// +def initOptions(Map args) { + def Map options = [:] + options.args = args.args ?: '' + options.args2 = args.args2 ?: '' + options.args3 = args.args3 ?: '' + options.publish_by_meta = args.publish_by_meta ?: [] + options.publish_dir = args.publish_dir ?: '' + options.publish_files = args.publish_files + options.suffix = args.suffix ?: '' + return options +} + +// +// Tidy up and join elements of a list to return a path string +// +def getPathFromList(path_list) { + def paths = path_list.findAll { item -> !item?.trim().isEmpty() } // Remove empty entries + paths = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes + return paths.join('/') +} + +// +// Function to save/publish module results +// +def saveFiles(Map args) { + def ioptions = initOptions(args.options) + def path_list = [ ioptions.publish_dir ?: args.publish_dir ] + + // Do not publish versions.yml unless running from pytest workflow + if (args.filename.equals('versions.yml') && !System.getenv("NF_CORE_MODULES_TEST")) { + return null + } + if (ioptions.publish_by_meta) { + def key_list = ioptions.publish_by_meta instanceof List ? ioptions.publish_by_meta : args.publish_by_meta + for (key in key_list) { + if (args.meta && key instanceof String) { + def path = key + if (args.meta.containsKey(key)) { + path = args.meta[key] instanceof Boolean ? "${key}_${args.meta[key]}".toString() : args.meta[key] + } + path = path instanceof String ? path : '' + path_list.add(path) + } + } + } + if (ioptions.publish_files instanceof Map) { + for (ext in ioptions.publish_files) { + if (args.filename.endsWith(ext.key)) { + def ext_list = path_list.collect() + ext_list.add(ext.value) + return "${getPathFromList(ext_list)}/$args.filename" + } + } + } else if (ioptions.publish_files == null) { + return "${getPathFromList(path_list)}/$args.filename" + } +} diff --git a/modules/racon/main.nf b/modules/racon/main.nf new file mode 100644 index 00000000..60a5061e --- /dev/null +++ b/modules/racon/main.nf @@ -0,0 +1,45 @@ +// Import generic module functions +include { initOptions; saveFiles; getSoftwareName; getProcessName } from './functions' + +params.options = [:] +options = initOptions(params.options) + +process RACON { + tag "$meta.id" + label 'process_high' + publishDir "${params.outdir}", + mode: params.publish_dir_mode, + saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:meta, publish_by_meta:['id']) } + + conda (params.enable_conda ? "bioconda::racon=1.4.20" : null) + if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { + container "https://depot.galaxyproject.org/singularity/racon:1.4.20--h9a82719_1" + } else { + container "quay.io/biocontainers/racon:1.4.20--h9a82719_1" + } + + input: + tuple val(meta), path(reads), path(assembly), path(paf) + + output: + tuple val(meta), path('*_assembly_consensus.fasta.gz') , emit: improved_assembly + path "versions.yml" , emit: versions + + script: + def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}" + """ + racon -t "${task.cpus}" \\ + "${reads}" \\ + "${paf}" \\ + $options.args \\ + "${assembly}" > \\ + ${prefix}_assembly_consensus.fasta + + gzip -n ${prefix}_assembly_consensus.fasta + + cat <<-END_VERSIONS > versions.yml + ${getProcessName(task.process)}: + ${getSoftwareName(task.process)}: \$( racon --version 2>&1 | sed 's/^.*v//' ) + END_VERSIONS + """ +} diff --git a/modules/racon/meta.yml b/modules/racon/meta.yml new file mode 100644 index 00000000..2428f044 --- /dev/null +++ b/modules/racon/meta.yml @@ -0,0 +1,52 @@ +name: racon +description: Consensus module for raw de novo DNA assembly of long uncorrected reads +keywords: + - assembly + - pacbio + - nanopore + - polish +tools: + - racon: + description: Ultrafast consensus module for raw de novo genome assembly of long uncorrected reads. + homepage: https://github.com/lbcb-sci/racon + documentation: https://github.com/lbcb-sci/racon + tool_dev_url: https://github.com/lbcb-sci/racon + doi: https://doi.org/10.1101/gr.214270.116 + licence: ['MIT'] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: List of input FastQ files. Racon expects single end reads + pattern: "*.{fastq,fastq.gz,fq,fq.gz}" + - assembly: + type: file + description: Genome assembly to be improved + pattern: "*.{fasta,fa}" + - paf: + type: file + description: Alignment in PAF format + pattern: "*.paf" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - improved_assembly: + type: file + description: Improved genome assembly + pattern: "*_assembly_consensus.fasta.gz" + +authors: + - "@avantonder" diff --git a/tests/config/pytest_modules.yml b/tests/config/pytest_modules.yml index 4fdd8303..155ed78d 100644 --- a/tests/config/pytest_modules.yml +++ b/tests/config/pytest_modules.yml @@ -923,6 +923,10 @@ quast: - modules/quast/** - tests/modules/quast/** +racon: + - modules/racon/** + - tests/modules/racon/** + rapidnj: - modules/rapidnj/** - tests/modules/rapidnj/** diff --git a/tests/config/test_data.config b/tests/config/test_data.config index e8729b9b..c05e1c8f 100644 --- a/tests/config/test_data.config +++ b/tests/config/test_data.config @@ -11,6 +11,7 @@ params { genome_gff3 = "${test_data_dir}/genomics/sarscov2/genome/genome.gff3" genome_gff3_gz = "${test_data_dir}/genomics/sarscov2/genome/genome.gff3.gz" genome_gtf = "${test_data_dir}/genomics/sarscov2/genome/genome.gtf" + genome_paf = "${test_data_dir}/genomics/sarscov2/genome/genome.paf" genome_sizes = "${test_data_dir}/genomics/sarscov2/genome/genome.sizes" transcriptome_fasta = "${test_data_dir}/genomics/sarscov2/genome/transcriptome.fasta" transcriptome_paf = "${test_data_dir}/genomics/sarscov2/genome/transcriptome.paf" @@ -243,6 +244,7 @@ params { 'bacteroides_fragilis'{ 'genome' { genome_fna_gz = "${test_data_dir}/genomics/bacteroides_fragilis/genome/genome.fna.gz" + genome_paf = "${test_data_dir}/genomics/bacteroides_fragilis/genome/genome.paf" } 'illumina' { test1_contigs_fa_gz = "${test_data_dir}/genomics/bacteroides_fragilis/illumina/fasta/test1.contigs.fa.gz" @@ -253,7 +255,8 @@ params { } 'nanopore' { test_fastq_gz = "${test_data_dir}/genomics/bacteroides_fragilis/nanopore/fastq/test.fastq.gz" + overlap_paf = "${test_data_dir}/genomics/bacteroides_fragilis/nanopore/overlap.paf" } - } + } } } diff --git a/tests/modules/minimap2/align/test.yml b/tests/modules/minimap2/align/test.yml index f9b762bb..3309bf4b 100644 --- a/tests/modules/minimap2/align/test.yml +++ b/tests/modules/minimap2/align/test.yml @@ -14,4 +14,4 @@ - minimap2/align files: - path: ./output/minimap2/test.paf - md5sum: 5e7b55a26bf0ea3a2843423d3e0b9a28 + md5sum: 5e7b55a26bf0ea3a2843423d3e0b9a28 \ No newline at end of file diff --git a/tests/modules/racon/main.nf b/tests/modules/racon/main.nf new file mode 100644 index 00000000..b6b864e1 --- /dev/null +++ b/tests/modules/racon/main.nf @@ -0,0 +1,15 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { RACON } from '../../../modules/racon/main.nf' addParams( options: [:] ) + +workflow test_racon { + input = [ [ id:'test', single_end:true ], // meta map + file(params.test_data['bacteroides_fragilis']['nanopore']['test_fastq_gz'], checkIfExists: true), + file(params.test_data['bacteroides_fragilis']['genome']['genome_fna_gz'], checkIfExists: true), + file(params.test_data['bacteroides_fragilis']['genome']['genome_paf'], checkIfExists: true) + ] + + RACON ( input ) +} \ No newline at end of file diff --git a/tests/modules/racon/test.yml b/tests/modules/racon/test.yml new file mode 100644 index 00000000..dc8e57dc --- /dev/null +++ b/tests/modules/racon/test.yml @@ -0,0 +1,7 @@ +- name: racon test_racon + command: nextflow run tests/modules/racon -entry test_racon -c tests/config/nextflow.config + tags: + - racon + files: + - path: output/racon/test_assembly_consensus.fasta.gz + md5sum: 96a0ba94c6154f6f37b5a76a0207eb6f From a4943a9e572ac2a5721f598b7bca059633408481 Mon Sep 17 00:00:00 2001 From: louperelo <44900284+louperelo@users.noreply.github.com> Date: Fri, 29 Oct 2021 15:02:02 +0200 Subject: [PATCH 02/15] Mtnucratio (#986) * add new module samtools_depth * fixed main.nf for samtools/depth * add new module mtnucratio * fix main.nf * Apply suggestions from code review Co-authored-by: James A. Fellows Yates * Apply suggestions from code review Co-authored-by: James A. Fellows Yates * layout in main.nf Co-authored-by: James A. Fellows Yates --- modules/mtnucratio/functions.nf | 78 +++++++++++++++++++++++++++++++ modules/mtnucratio/main.nf | 43 +++++++++++++++++ modules/mtnucratio/meta.yml | 54 +++++++++++++++++++++ tests/config/pytest_modules.yml | 4 ++ tests/modules/mtnucratio/main.nf | 14 ++++++ tests/modules/mtnucratio/test.yml | 9 ++++ 6 files changed, 202 insertions(+) create mode 100644 modules/mtnucratio/functions.nf create mode 100644 modules/mtnucratio/main.nf create mode 100644 modules/mtnucratio/meta.yml create mode 100644 tests/modules/mtnucratio/main.nf create mode 100644 tests/modules/mtnucratio/test.yml diff --git a/modules/mtnucratio/functions.nf b/modules/mtnucratio/functions.nf new file mode 100644 index 00000000..85628ee0 --- /dev/null +++ b/modules/mtnucratio/functions.nf @@ -0,0 +1,78 @@ +// +// Utility functions used in nf-core DSL2 module files +// + +// +// Extract name of software tool from process name using $task.process +// +def getSoftwareName(task_process) { + return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase() +} + +// +// Extract name of module from process name using $task.process +// +def getProcessName(task_process) { + return task_process.tokenize(':')[-1] +} + +// +// Function to initialise default values and to generate a Groovy Map of available options for nf-core modules +// +def initOptions(Map args) { + def Map options = [:] + options.args = args.args ?: '' + options.args2 = args.args2 ?: '' + options.args3 = args.args3 ?: '' + options.publish_by_meta = args.publish_by_meta ?: [] + options.publish_dir = args.publish_dir ?: '' + options.publish_files = args.publish_files + options.suffix = args.suffix ?: '' + return options +} + +// +// Tidy up and join elements of a list to return a path string +// +def getPathFromList(path_list) { + def paths = path_list.findAll { item -> !item?.trim().isEmpty() } // Remove empty entries + paths = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes + return paths.join('/') +} + +// +// Function to save/publish module results +// +def saveFiles(Map args) { + def ioptions = initOptions(args.options) + def path_list = [ ioptions.publish_dir ?: args.publish_dir ] + + // Do not publish versions.yml unless running from pytest workflow + if (args.filename.equals('versions.yml') && !System.getenv("NF_CORE_MODULES_TEST")) { + return null + } + if (ioptions.publish_by_meta) { + def key_list = ioptions.publish_by_meta instanceof List ? ioptions.publish_by_meta : args.publish_by_meta + for (key in key_list) { + if (args.meta && key instanceof String) { + def path = key + if (args.meta.containsKey(key)) { + path = args.meta[key] instanceof Boolean ? "${key}_${args.meta[key]}".toString() : args.meta[key] + } + path = path instanceof String ? path : '' + path_list.add(path) + } + } + } + if (ioptions.publish_files instanceof Map) { + for (ext in ioptions.publish_files) { + if (args.filename.endsWith(ext.key)) { + def ext_list = path_list.collect() + ext_list.add(ext.value) + return "${getPathFromList(ext_list)}/$args.filename" + } + } + } else if (ioptions.publish_files == null) { + return "${getPathFromList(path_list)}/$args.filename" + } +} diff --git a/modules/mtnucratio/main.nf b/modules/mtnucratio/main.nf new file mode 100644 index 00000000..28d08a13 --- /dev/null +++ b/modules/mtnucratio/main.nf @@ -0,0 +1,43 @@ +include { initOptions; saveFiles; getSoftwareName; getProcessName } from './functions' + +params.options = [:] +options = initOptions(params.options) + +process MTNUCRATIO { + tag "$meta.id" + label 'process_low' + publishDir "${params.outdir}", + mode: params.publish_dir_mode, + saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:meta, publish_by_meta:['id']) } + + conda (params.enable_conda ? "bioconda::mtnucratio=0.7" : null) + if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { + container "https://depot.galaxyproject.org/singularity/mtnucratio:0.7--hdfd78af_2" + } else { + container "quay.io/biocontainers/mtnucratio:0.7--hdfd78af_2" + } + + input: + tuple val(meta), path(bam) + val(mt_id) + + output: + tuple val(meta), path("*.mtnucratio"), emit: mtnucratio + tuple val(meta), path("*.json") , emit: json + path "versions.yml" , emit: versions + + script: + def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}" + + """ + mtnucratio \\ + $options.args \\ + $bam \\ + $mt_id + + cat <<-END_VERSIONS > versions.yml + ${getProcessName(task.process)}: + ${getSoftwareName(task.process)}: \$(echo \$(mtnucratio --version 2>&1) | head -n1 | sed 's/Version: //') + END_VERSIONS + """ +} diff --git a/modules/mtnucratio/meta.yml b/modules/mtnucratio/meta.yml new file mode 100644 index 00000000..824af397 --- /dev/null +++ b/modules/mtnucratio/meta.yml @@ -0,0 +1,54 @@ +name: mtnucratio +description: A small Java tool to calculate ratios between MT and nuclear sequencing reads in a given BAM file. +keywords: + - mtnucratio + - ratio + - reads + - bam + - mitochondrial to nuclear ratio + - mitochondria + - statistics +tools: + - mtnucratio: + description: A small tool to determine MT to Nuclear ratios for NGS data. + homepage: https://github.com/apeltzer/MTNucRatioCalculator + documentation: https://github.com/apeltzer/MTNucRatioCalculator + tool_dev_url: https://github.com/apeltzer/MTNucRatioCalculator + doi: "10.1186/s13059-016-0918-z" + licence: ['GPL v3'] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: (coordinate) sorted BAM/SAM file + pattern: "*.{bam,sam}" + - mt_id: + type: string + description: Identifier of the contig/chromosome of interest (e.g. chromosome, contig) as in the aligned against reference FASTA file, e.g. mt or chrMT for mitochondria + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - mtnucratio: + type: file + description: Text file containing metrics (mtreads, mt_cov_avg, nucreads, nuc_cov_avg, mt_nuc_ratio) + pattern: "*.mtnucratio" + - json: + type: file + description: JSON file, containing metadata map with sample name, tool name and version, and metrics as in txt file + pattern: "*.json" + +authors: + - "@louperelo" diff --git a/tests/config/pytest_modules.yml b/tests/config/pytest_modules.yml index 155ed78d..c0936a81 100644 --- a/tests/config/pytest_modules.yml +++ b/tests/config/pytest_modules.yml @@ -778,6 +778,10 @@ msisensor/scan: - modules/msisensor/scan/** - tests/modules/msisensor/scan/** +mtnucratio: + - modules/mtnucratio/** + - tests/modules/mtnucratio/** + multiqc: - modules/fastqc/** - modules/multiqc/** diff --git a/tests/modules/mtnucratio/main.nf b/tests/modules/mtnucratio/main.nf new file mode 100644 index 00000000..dd9fc9db --- /dev/null +++ b/tests/modules/mtnucratio/main.nf @@ -0,0 +1,14 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { MTNUCRATIO } from '../../../modules/mtnucratio/main.nf' addParams( options: [:] ) + +workflow test_mtnucratio { + + input = [ [ id:'test', single_end:false ], // meta map + file(params.test_data['sarscov2']['illumina']['test_single_end_sorted_bam'], checkIfExists: true)] + mt_id = 'mt_id' + + MTNUCRATIO ( input, mt_id ) +} diff --git a/tests/modules/mtnucratio/test.yml b/tests/modules/mtnucratio/test.yml new file mode 100644 index 00000000..76cbaf32 --- /dev/null +++ b/tests/modules/mtnucratio/test.yml @@ -0,0 +1,9 @@ +- name: mtnucratio + command: nextflow run tests/modules/mtnucratio -entry test_mtnucratio -c tests/config/nextflow.config + tags: + - mtnucratio + files: + - path: output/mtnucratio/test.single_end.sorted.bam.mtnucratio + md5sum: 19e96849802c70aa0694785f716274b7 + - path: output/mtnucratio/test.single_end.sorted.bam.mtnucratiomtnuc.json + md5sum: 14d24be6272854d6762f0dfad5918ef6 From 0a7368aa6f89a63229f5e04fa937851348e16ccf Mon Sep 17 00:00:00 2001 From: Jose Espinosa-Carrasco Date: Fri, 29 Oct 2021 16:37:39 +0200 Subject: [PATCH 03/15] Refactor genrich to avoid params in main (#981) * Refactor genrich to avoid params in main * Missed to commit test.yml file --- modules/genrich/main.nf | 19 +++++++----- modules/genrich/meta.yml | 15 ++++++++-- tests/modules/genrich/main.nf | 53 ++++++++++++++++++++++++++++------ tests/modules/genrich/test.yml | 20 +++++++++---- 4 files changed, 81 insertions(+), 26 deletions(-) diff --git a/modules/genrich/main.nf b/modules/genrich/main.nf index c947e9cf..f34f9cd2 100644 --- a/modules/genrich/main.nf +++ b/modules/genrich/main.nf @@ -22,6 +22,10 @@ process GENRICH { tuple val(meta), path(treatment_bam) path control_bam path blacklist_bed + val save_pvalues + val save_pileup + val save_bed + val save_duplicates output: tuple val(meta), path("*narrowPeak") , emit: peaks @@ -32,14 +36,14 @@ process GENRICH { path "versions.yml" , emit: versions script: - def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}" - def control = params.control_bam ? "-c $control_bam" : '' - def pvalues = params.pvalues ? "-f ${prefix}.pvalues.bedGraph" : "" - def pileup = params.pileup ? "-k ${prefix}.pileup.bedGraph" : "" - def bed = params.bed ? "-b ${prefix}.intervals.bed" : "" - def blacklist = params.blacklist_bed ? "-E $blacklist_bed" : "" + def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}" + def control = control_bam ? "-c $control_bam" : '' + def blacklist = blacklist_bed ? "-E $blacklist_bed" : "" + def pvalues = save_pvalues ? "-f ${prefix}.pvalues.bedGraph" : "" + def pileup = save_pileup ? "-k ${prefix}.pileup.bedGraph" : "" + def bed = save_bed ? "-b ${prefix}.intervals.bed" : "" def duplicates = "" - if (params.save_duplicates) { + if (save_duplicates) { if (options.args.contains('-r')) { duplicates = "-R ${prefix}.duplicates.txt" } else { @@ -58,7 +62,6 @@ process GENRICH { $pileup \\ $bed \\ $duplicates \\ - $blacklist \\ $control cat <<-END_VERSIONS > versions.yml diff --git a/modules/genrich/meta.yml b/modules/genrich/meta.yml index 8f7b004b..37184190 100644 --- a/modules/genrich/meta.yml +++ b/modules/genrich/meta.yml @@ -15,7 +15,6 @@ tools: tool_dev_url: https://github.com/jsh58/Genrich doi: "" licence: ['MIT'] - input: - meta: type: map @@ -34,7 +33,18 @@ input: type: file description: Bed file containing genomic intervals to exclude from the analysis pattern: "*.{bed}" - + - save_pvalues: + type: boolean + description: Create bedgraph-ish file for p/q-values file + - save_pileup: + type: boolean + description: Create bedgraph-ish file for pileups and p-values + - save_bed: + type: boolean + description: Create BED file for reads/fragments/intervals + - save_duplicates: + type: boolean + description: Create PCR duplicates file (only works if -r option is set) output: - meta: type: map @@ -65,7 +75,6 @@ output: type: file description: File containing software version pattern: "*.{version.txt}" - authors: - "@JoseEspinosa" diff --git a/tests/modules/genrich/main.nf b/tests/modules/genrich/main.nf index 654b38e5..aa1a2d49 100644 --- a/tests/modules/genrich/main.nf +++ b/tests/modules/genrich/main.nf @@ -2,10 +2,10 @@ nextflow.enable.dsl = 2 -include { GENRICH } from '../../../modules/genrich/main.nf' addParams( control_bam: false, pvalues: false, pileup:false, bed:false, blacklist_bed:false, save_duplicates:false, options: ["args": "-p 0.1"] ) -include { GENRICH as GENRICH_BLACKLIST } from '../../../modules/genrich/main.nf' addParams( control_bam: false, pvalues: false, pileup:false, bed:false, blacklist_bed:true, save_duplicates:false, options: ["args": "-p 0.1"] ) -include { GENRICH as GENRICH_ALL_OUTPUTS } from '../../../modules/genrich/main.nf' addParams( control_bam: false, pvalues: true, pileup:true, bed:true, blacklist_bed:false, save_duplicates:true, options: ["args": "-r -p 0.1"] ) -include { GENRICH as GENRICH_ATACSEQ } from '../../../modules/genrich/main.nf' addParams( control_bam: false, pvalues: false, pileup:false, bed:false, blacklist_bed:false, save_duplicates:false, options: ["args": "-j -p 0.1"] ) +include { GENRICH } from '../../../modules/genrich/main.nf' addParams( options: ["args": "-p 0.1"] ) +include { GENRICH as GENRICH_CTRL } from '../../../modules/genrich/main.nf' addParams( options: ["args": "-p 0.9"] ) +include { GENRICH as GENRICH_ALL } from '../../../modules/genrich/main.nf' addParams( options: ["args": "-r -p 0.1"] ) +include { GENRICH as GENRICH_ATACSEQ } from '../../../modules/genrich/main.nf' addParams( options: ["args": "-j -p 0.1"] ) workflow test_genrich { input = [ [ id:'test', single_end:false ], // meta map @@ -13,7 +13,12 @@ workflow test_genrich { control = [ ] blacklist = [ ] - GENRICH ( input, control, blacklist ) + save_pvalues = false + save_pileup = false + save_bed = false + save_duplicates = false + + GENRICH ( input, control, blacklist, save_pvalues, save_pileup, save_bed, save_duplicates ) } workflow test_genrich_ctrl { @@ -22,7 +27,12 @@ workflow test_genrich_ctrl { control = [ file( params.test_data['homo_sapiens']['illumina']['test2_paired_end_name_sorted_bam'], checkIfExists: true) ] blacklist = [ ] - GENRICH ( input, control, blacklist ) + save_pvalues = false + save_pileup = false + save_bed = false + save_duplicates = false + + GENRICH_CTRL ( input, control, blacklist, save_pvalues, save_pileup, save_bed, save_duplicates ) } workflow test_genrich_all_outputs { @@ -31,14 +41,39 @@ workflow test_genrich_all_outputs { control = [ file( params.test_data['homo_sapiens']['illumina']['test2_paired_end_name_sorted_bam'], checkIfExists: true) ] blacklist = [ ] - GENRICH_ALL_OUTPUTS ( input, control, blacklist ) + save_pvalues = true + save_pileup = true + save_bed = true + save_duplicates = true + + GENRICH_ALL ( input, control, blacklist, save_pvalues, save_pileup, save_bed, save_duplicates ) +} + +workflow test_genrich_blacklist { + input = [ [ id:'test', single_end:false ], // meta map + [ file( params.test_data['homo_sapiens']['illumina']['test_paired_end_name_sorted_bam'], checkIfExists: true) ]] + control = [ ] + blacklist = [ file(params.test_data['sarscov2']['genome']['test_bed'], checkIfExists: true)] + + save_pvalues = false + save_pileup = false + save_bed = false + save_duplicates = false + + GENRICH ( input, control, blacklist, save_pvalues, save_pileup, save_bed, save_duplicates ) } workflow test_genrich_atacseq { input = [ [ id:'test', single_end:false ], // meta map [ file( params.test_data['homo_sapiens']['illumina']['test_paired_end_name_sorted_bam'], checkIfExists: true) ]] - control = [ file( params.test_data['homo_sapiens']['illumina']['test2_paired_end_name_sorted_bam'], checkIfExists: true) ] + control = [ ] blacklist = [ ] - GENRICH_ATACSEQ ( input, control, blacklist ) + save_pvalues = false + save_pileup = false + save_bed = false + save_duplicates = false + + GENRICH_ATACSEQ ( input, control, blacklist, save_pvalues, save_pileup, save_bed, save_duplicates ) } + diff --git a/tests/modules/genrich/test.yml b/tests/modules/genrich/test.yml index bd762f7c..63bf2927 100644 --- a/tests/modules/genrich/test.yml +++ b/tests/modules/genrich/test.yml @@ -12,7 +12,7 @@ - genrich files: - path: output/genrich/test.narrowPeak - md5sum: 6afabdd3f691c7c84c66ff8a23984681 + md5sum: 2fcc392360b317f5ebee88cdbc149e05 - name: genrich test_genrich_all_outputs command: nextflow run tests/modules/genrich -entry test_genrich_all_outputs -c tests/config/nextflow.config @@ -20,15 +20,23 @@ - genrich files: - path: output/genrich/test.duplicates.txt - md5sum: a92893f905fd8b3751bc6a960fbfe7ba + md5sum: 159d557af7c23bc3cfb802d87fa96c34 - path: output/genrich/test.intervals.bed - md5sum: 52edf47e6641c0cc03f9cca7324f7eaa + md5sum: 4bea65caa3f4043d703af4b57161112e - path: output/genrich/test.narrowPeak - md5sum: e45eb7d000387975050c2e85c164e5be + md5sum: d41d8cd98f00b204e9800998ecf8427e - path: output/genrich/test.pileup.bedGraph - md5sum: e4f7fa664cd4ed2cf3a1a3a9eb415e71 + md5sum: 03e53848de695b5794f32f15b2709203 - path: output/genrich/test.pvalues.bedGraph - md5sum: 564859953704983393d4b7d6317060cd + md5sum: b14feef34b6d2379a173a734ca963cde + +- name: genrich test_genrich_blacklist + command: nextflow run tests/modules/genrich -entry test_genrich_blacklist -c tests/config/nextflow.config + tags: + - genrich + files: + - path: output/genrich/test.narrowPeak + md5sum: 6afabdd3f691c7c84c66ff8a23984681 - name: genrich test_genrich_atacseq command: nextflow run tests/modules/genrich -entry test_genrich_atacseq -c tests/config/nextflow.config From 3df4fe60851c19ec8d55ef3a978ae851eaccfd68 Mon Sep 17 00:00:00 2001 From: FriederikeHanssen Date: Fri, 29 Oct 2021 17:05:06 +0200 Subject: [PATCH 04/15] add freebayes/somatic and update freebayes/germline (#990) * add freebayes/somatic and update freebayes/germline to new syntax and cram * add pytest * update with new freebayes/germline path --- modules/freebayes/{ => germline}/functions.nf | 0 modules/freebayes/{ => germline}/main.nf | 31 ++++---- modules/freebayes/{ => germline}/meta.yml | 10 +-- modules/freebayes/somatic/functions.nf | 78 +++++++++++++++++++ modules/freebayes/somatic/main.nf | 74 ++++++++++++++++++ modules/freebayes/somatic/meta.yml | 66 ++++++++++++++++ modules/gatk4/applybqsr/meta.yml | 1 + tests/config/pytest_modules.yml | 10 ++- tests/modules/freebayes/germline/main.nf | 51 ++++++++++++ tests/modules/freebayes/germline/test.yml | 26 +++++++ tests/modules/freebayes/main.nf | 35 --------- tests/modules/freebayes/somatic/main.nf | 37 +++++++++ tests/modules/freebayes/somatic/test.yml | 17 ++++ tests/modules/freebayes/test.yml | 14 ---- 14 files changed, 378 insertions(+), 72 deletions(-) rename modules/freebayes/{ => germline}/functions.nf (100%) rename modules/freebayes/{ => germline}/main.nf (75%) rename modules/freebayes/{ => germline}/meta.yml (95%) create mode 100644 modules/freebayes/somatic/functions.nf create mode 100644 modules/freebayes/somatic/main.nf create mode 100644 modules/freebayes/somatic/meta.yml create mode 100644 tests/modules/freebayes/germline/main.nf create mode 100644 tests/modules/freebayes/germline/test.yml delete mode 100644 tests/modules/freebayes/main.nf create mode 100644 tests/modules/freebayes/somatic/main.nf create mode 100644 tests/modules/freebayes/somatic/test.yml delete mode 100644 tests/modules/freebayes/test.yml diff --git a/modules/freebayes/functions.nf b/modules/freebayes/germline/functions.nf similarity index 100% rename from modules/freebayes/functions.nf rename to modules/freebayes/germline/functions.nf diff --git a/modules/freebayes/main.nf b/modules/freebayes/germline/main.nf similarity index 75% rename from modules/freebayes/main.nf rename to modules/freebayes/germline/main.nf index 63235d8a..eae62036 100644 --- a/modules/freebayes/main.nf +++ b/modules/freebayes/germline/main.nf @@ -4,7 +4,7 @@ include { initOptions; saveFiles; getProcessName; getSoftwareName } from './func params.options = [:] options = initOptions(params.options) -process FREEBAYES { +process FREEBAYES_GERMLINE { tag "$meta.id" label 'process_low' publishDir "${params.outdir}", @@ -19,24 +19,25 @@ process FREEBAYES { } input: - tuple val(meta), path(bam), path(bai) - tuple path(fasta), path(fai) - path(targets) - path(samples) - path(populations) - path(cnv) - + tuple val(meta), path(input), path(input_index) + path fasta + path fai + path targets + path samples + path populations + path cnv output: tuple val(meta), path("*.vcf.gz") , emit: vcf path "versions.yml" , emit: versions script: - def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}" - def targets_file = targets ? "--target ${targets}" : "" - def samples_file = samples ? "--samples ${samples}" : "" - def populations_file = populations ? "--populations ${populations}" : "" - def cnv_file = cnv ? "--cnv-map ${cnv}" : "" + def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}" + def targets_file = targets ? "--target ${targets}" : "" + def samples_file = samples ? "--samples ${samples}" : "" + def populations_file = populations ? "--populations ${populations}" : "" + def cnv_file = cnv ? "--cnv-map ${cnv}" : "" + if (task.cpus > 1) { """ freebayes-parallel \\ @@ -47,7 +48,7 @@ process FREEBAYES { $populations_file \\ $cnv_file \\ $options.args \\ - $bam > ${prefix}.vcf + $input > ${prefix}.vcf gzip --no-name ${prefix}.vcf @@ -66,7 +67,7 @@ process FREEBAYES { $populations_file \\ $cnv_file \\ $options.args \\ - $bam > ${prefix}.vcf + $input > ${prefix}.vcf gzip --no-name ${prefix}.vcf diff --git a/modules/freebayes/meta.yml b/modules/freebayes/germline/meta.yml similarity index 95% rename from modules/freebayes/meta.yml rename to modules/freebayes/germline/meta.yml index 46eb5309..86650715 100644 --- a/modules/freebayes/meta.yml +++ b/modules/freebayes/germline/meta.yml @@ -1,4 +1,4 @@ -name: freebayes +name: freebayes_germline description: A haplotype-based variant detector keywords: - variant caller @@ -12,7 +12,7 @@ tools: homepage: https://github.com/freebayes/freebayes documentation: https://github.com/freebayes/freebayes tool_dev_url: https://github.com/freebayes/freebayes - doi: "" + doi: "arXiv:1207.3907" licence: ['MIT'] input: @@ -21,11 +21,11 @@ input: description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - - bam: + - input: type: file description: BAM/CRAM/SAM file pattern: "*.{bam,cram,sam}" - - bai: + - input_index: type: file description: BAM/CRAM/SAM index file pattern: "*.bam.bai" @@ -59,7 +59,6 @@ input: seq_name start end sample_name copy_number pattern: "*.bed" - output: - meta: type: map @@ -76,3 +75,4 @@ output: pattern: "*.vcf.gz" authors: - "@maxibor" + - "@FriederikeHanssen" diff --git a/modules/freebayes/somatic/functions.nf b/modules/freebayes/somatic/functions.nf new file mode 100644 index 00000000..85628ee0 --- /dev/null +++ b/modules/freebayes/somatic/functions.nf @@ -0,0 +1,78 @@ +// +// Utility functions used in nf-core DSL2 module files +// + +// +// Extract name of software tool from process name using $task.process +// +def getSoftwareName(task_process) { + return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase() +} + +// +// Extract name of module from process name using $task.process +// +def getProcessName(task_process) { + return task_process.tokenize(':')[-1] +} + +// +// Function to initialise default values and to generate a Groovy Map of available options for nf-core modules +// +def initOptions(Map args) { + def Map options = [:] + options.args = args.args ?: '' + options.args2 = args.args2 ?: '' + options.args3 = args.args3 ?: '' + options.publish_by_meta = args.publish_by_meta ?: [] + options.publish_dir = args.publish_dir ?: '' + options.publish_files = args.publish_files + options.suffix = args.suffix ?: '' + return options +} + +// +// Tidy up and join elements of a list to return a path string +// +def getPathFromList(path_list) { + def paths = path_list.findAll { item -> !item?.trim().isEmpty() } // Remove empty entries + paths = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes + return paths.join('/') +} + +// +// Function to save/publish module results +// +def saveFiles(Map args) { + def ioptions = initOptions(args.options) + def path_list = [ ioptions.publish_dir ?: args.publish_dir ] + + // Do not publish versions.yml unless running from pytest workflow + if (args.filename.equals('versions.yml') && !System.getenv("NF_CORE_MODULES_TEST")) { + return null + } + if (ioptions.publish_by_meta) { + def key_list = ioptions.publish_by_meta instanceof List ? ioptions.publish_by_meta : args.publish_by_meta + for (key in key_list) { + if (args.meta && key instanceof String) { + def path = key + if (args.meta.containsKey(key)) { + path = args.meta[key] instanceof Boolean ? "${key}_${args.meta[key]}".toString() : args.meta[key] + } + path = path instanceof String ? path : '' + path_list.add(path) + } + } + } + if (ioptions.publish_files instanceof Map) { + for (ext in ioptions.publish_files) { + if (args.filename.endsWith(ext.key)) { + def ext_list = path_list.collect() + ext_list.add(ext.value) + return "${getPathFromList(ext_list)}/$args.filename" + } + } + } else if (ioptions.publish_files == null) { + return "${getPathFromList(path_list)}/$args.filename" + } +} diff --git a/modules/freebayes/somatic/main.nf b/modules/freebayes/somatic/main.nf new file mode 100644 index 00000000..c1579661 --- /dev/null +++ b/modules/freebayes/somatic/main.nf @@ -0,0 +1,74 @@ +// Import generic module functions +include { initOptions; saveFiles; getProcessName; getSoftwareName } from './functions' + +params.options = [:] +options = initOptions(params.options) + +process FREEBAYES_SOMATIC { + tag "$meta.id" + label 'process_low' + publishDir "${params.outdir}", + mode: params.publish_dir_mode, + saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:meta, publish_by_meta:['id']) } + + conda (params.enable_conda ? "bioconda::freebayes=1.3.5" : null) + if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { + container "https://depot.galaxyproject.org/singularity/freebayes:1.3.5--py38ha193a2f_3" + } else { + container "quay.io/biocontainers/freebayes:1.3.5--py38ha193a2f_3" + } + + input: + tuple val(meta), path(input_normal), path(input_index_normal), path(input_tumor), path(input_index_tumor) + path fasta + path fai + path targets + path samples + + output: + tuple val(meta), path("*.vcf.gz") , emit: vcf + path "versions.yml" , emit: versions + + script: + def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}" + def targets_file = targets ? "--target ${targets}" : "" + def samples_file = samples ? "--samples ${samples}" : "" + + if (task.cpus > 1) { + """ + freebayes-parallel \\ + <(fasta_generate_regions.py ${fasta}.fai 10000) ${task.cpus} \\ + -f $fasta \\ + $targets_file \\ + $samples_file \\ + $options.args \\ + $input_tumor \\ + $input_normal > ${prefix}.vcf + + gzip --no-name ${prefix}.vcf + + cat <<-END_VERSIONS > versions.yml + ${getProcessName(task.process)}: + ${getSoftwareName(task.process)}: \$(echo \$(freebayes --version 2>&1) | sed 's/version:\s*v//g' ) + END_VERSIONS + """ + + } else { + """ + freebayes \\ + -f $fasta \\ + $targets_file \\ + $samples_file \\ + $options.args \\ + $input_tumor \\ + $input_normal > ${prefix}.vcf + + gzip --no-name ${prefix}.vcf + + cat <<-END_VERSIONS > versions.yml + ${getProcessName(task.process)}: + ${getSoftwareName(task.process)}: \$(echo \$(freebayes --version 2>&1) | sed 's/version:\s*v//g' ) + END_VERSIONS + """ + } +} diff --git a/modules/freebayes/somatic/meta.yml b/modules/freebayes/somatic/meta.yml new file mode 100644 index 00000000..391e5007 --- /dev/null +++ b/modules/freebayes/somatic/meta.yml @@ -0,0 +1,66 @@ +name: freebayes_somatic +description: A haplotype-based variant detector +keywords: + - variant caller + - SNP + - genotyping + - somatic variant calling + - bayesian + +tools: + - freebayes: + description: Bayesian haplotype-based polymorphism discovery and genotyping + homepage: https://github.com/freebayes/freebayes + documentation: https://github.com/freebayes/freebayes + tool_dev_url: https://github.com/freebayes/freebayes + doi: "arXiv:1207.3907" + licence: ['MIT'] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + - input_index: + type: file + description: BAM/CRAM/SAM index file + pattern: "*.bam.bai" + - fasta: + type: file + description: reference fasta file + pattern: ".{fa,fa.gz,fasta,fasta.gz}" + - fai: + type: file + description: reference fasta file index + pattern: "*.fai" + - targets: + type: file + description: Optional - Limit analysis to targets listed in this BED-format FILE. + pattern: "*.bed" + - samples: + type: file + description: Optional - Limit analysis to samples listed (one per line) in the FILE. + pattern: "*.txt" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - version: + type: file + description: File containing software version + pattern: "*.{version.txt}" + - vcf: + type: file + description: Compressed VCF file + pattern: "*.vcf.gz" + +authors: + - "@FriederikeHanssen" diff --git a/modules/gatk4/applybqsr/meta.yml b/modules/gatk4/applybqsr/meta.yml index b002dca6..e7419860 100644 --- a/modules/gatk4/applybqsr/meta.yml +++ b/modules/gatk4/applybqsr/meta.yml @@ -61,3 +61,4 @@ output: authors: - "@yocra3" + - "@FriederikeHanssen" diff --git a/tests/config/pytest_modules.yml b/tests/config/pytest_modules.yml index c0936a81..d7fed0c0 100644 --- a/tests/config/pytest_modules.yml +++ b/tests/config/pytest_modules.yml @@ -410,9 +410,13 @@ flash: - modules/flash/** - tests/modules/flash/** -freebayes: - - modules/freebayes/** - - tests/modules/freebayes/** +freebayes/germline: + - modules/freebayes/germline/** + - tests/modules/freebayes/germline/** + +freebayes/somatic: + - modules/freebayes/somatic/** + - tests/modules/freebayes/somatic/** gatk4/applybqsr: - modules/gatk4/applybqsr/** diff --git a/tests/modules/freebayes/germline/main.nf b/tests/modules/freebayes/germline/main.nf new file mode 100644 index 00000000..1b39eea0 --- /dev/null +++ b/tests/modules/freebayes/germline/main.nf @@ -0,0 +1,51 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { FREEBAYES_GERMLINE } from '../../../../modules/freebayes/germline/main.nf' addParams( options: [:] ) + +workflow test_freebayes { + + input = [ [ id:'test', single_end:false ], // meta map + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true)] + fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + fai = file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true) + targets = [] + samples = [] + populations = [] + cnv = [] + + FREEBAYES_GERMLINE ( input, fasta, fai, targets, samples, populations, cnv) +} + +workflow test_freebayes_bed { + + input = [ [ id:'test', single_end:false ], // meta map + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true)] + fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + fai = file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true) + targets = file(params.test_data['sarscov2']['genome']['test_bed'], checkIfExists: true) + samples = [] + populations = [] + cnv = [] + + FREEBAYES_GERMLINE ( input, fasta, fai, targets, samples, populations, cnv) +} + +workflow test_freebayes_cram { + + input = [ [ id:'test', single_end:false ], // meta map + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram_crai'], checkIfExists: true) + ] + fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) + targets = [] + samples = [] + populations = [] + cnv = [] + + FREEBAYES_GERMLINE ( input, fasta, fai, targets, samples, populations, cnv) +} diff --git a/tests/modules/freebayes/germline/test.yml b/tests/modules/freebayes/germline/test.yml new file mode 100644 index 00000000..55925b92 --- /dev/null +++ b/tests/modules/freebayes/germline/test.yml @@ -0,0 +1,26 @@ +- name: freebayes germline test_freebayes + command: nextflow run tests/modules/freebayes/germline -entry test_freebayes -c tests/config/nextflow.config + tags: + - freebayes + - freebayes/germline + files: + - path: output/freebayes/test.vcf.gz + md5sum: 1ec210ad27514c7a4140c924dc66d979 + +- name: freebayes germline test_freebayes_bed + command: nextflow run tests/modules/freebayes/germline -entry test_freebayes_bed -c tests/config/nextflow.config + tags: + - freebayes + - freebayes/germline + files: + - path: output/freebayes/test.vcf.gz + md5sum: e8923cccd5dac196f72d3d3997a60706 + +- name: freebayes germline test_freebayes_cram + command: nextflow run tests/modules/freebayes/germline -entry test_freebayes_cram -c tests/config/nextflow.config + tags: + - freebayes + - freebayes/germline + files: + - path: output/freebayes/test.vcf.gz + md5sum: cb57a3ed154618e3aa4a5272fcfb7521 diff --git a/tests/modules/freebayes/main.nf b/tests/modules/freebayes/main.nf deleted file mode 100644 index 1c07b821..00000000 --- a/tests/modules/freebayes/main.nf +++ /dev/null @@ -1,35 +0,0 @@ -#!/usr/bin/env nextflow - -nextflow.enable.dsl = 2 - -include { FREEBAYES } from '../../../modules/freebayes/main.nf' addParams( options: [:] ) - -workflow test_freebayes { - - input = [ [ id:'test', single_end:false ], // meta map - file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), - file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true)] - reference = [file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true), - file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true)] - targets = [] - samples = [] - populations = [] - cnv = [] - - FREEBAYES ( input, reference, targets, samples, populations, cnv) -} - -workflow test_freebayes_bed { - - input = [ [ id:'test', single_end:false ], // meta map - file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), - file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true)] - reference = [file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true), - file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true)] - targets = file(params.test_data['sarscov2']['genome']['test_bed'], checkIfExists: true) - samples = [] - populations = [] - cnv = [] - - FREEBAYES ( input, reference, targets, samples, populations, cnv) -} diff --git a/tests/modules/freebayes/somatic/main.nf b/tests/modules/freebayes/somatic/main.nf new file mode 100644 index 00000000..d26caf34 --- /dev/null +++ b/tests/modules/freebayes/somatic/main.nf @@ -0,0 +1,37 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { FREEBAYES_SOMATIC } from '../../../../modules/freebayes/somatic/main.nf' addParams( options: [:] ) + +workflow test_freebayes { + + input = [ [ id:'test', single_end:false ], // meta map + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_sorted_bam'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_sorted_bam_bai'], checkIfExists: true) + ] + fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) + targets = [] + samples = [] + + FREEBAYES_SOMATIC ( input, fasta, fai, targets, samples) +} + +workflow test_freebayes_intervals { + + input = [ [ id:'test', single_end:false ], // meta map + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram_crai'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_sorted_cram'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_sorted_cram_crai'], checkIfExists: true) + ] + fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) + targets = file(params.test_data['homo_sapiens']['genome']['genome_bed'], checkIfExists: true) + samples = [] + + FREEBAYES_SOMATIC ( input, fasta, fai, targets, samples) +} diff --git a/tests/modules/freebayes/somatic/test.yml b/tests/modules/freebayes/somatic/test.yml new file mode 100644 index 00000000..93113e60 --- /dev/null +++ b/tests/modules/freebayes/somatic/test.yml @@ -0,0 +1,17 @@ +- name: freebayes somatic test_freebayes + command: nextflow run tests/modules/freebayes/somatic -entry test_freebayes -c tests/config/nextflow.config + tags: + - freebayes/somatic + - freebayes + files: + - path: output/freebayes/test.vcf.gz + md5sum: 1c47d02f27ec5918558c8688ce6e7780 + +- name: freebayes somatic test_freebayes_intervals + command: nextflow run tests/modules/freebayes/somatic -entry test_freebayes_intervals -c tests/config/nextflow.config + tags: + - freebayes/somatic + - freebayes + files: + - path: output/freebayes/test.vcf.gz + md5sum: 5b8a12666bde63746dcec7afcd3ef789 diff --git a/tests/modules/freebayes/test.yml b/tests/modules/freebayes/test.yml deleted file mode 100644 index 9ca54021..00000000 --- a/tests/modules/freebayes/test.yml +++ /dev/null @@ -1,14 +0,0 @@ -- name: freebayes test_freebayes - command: nextflow run tests/modules/freebayes -entry test_freebayes -c tests/config/nextflow.config - tags: - - freebayes - files: - - path: output/freebayes/test.vcf.gz - -- name: freebayes test_freebayes_bed - command: nextflow run tests/modules/freebayes -entry test_freebayes_bed -c tests/config/nextflow.config - tags: - - freebayes - files: - - path: output/freebayes/test.vcf.gz - From 7afb962f0bd5d9b504331082fa4bafd498074a4b Mon Sep 17 00:00:00 2001 From: GCJMackenzie <43276267+GCJMackenzie@users.noreply.github.com> Date: Fri, 29 Oct 2021 17:21:34 +0100 Subject: [PATCH 05/15] New module genomicsdbimport (#857) * saving changes to checkout * saving to sort out other branch * removed yml tracking of files that cant be tracked due to directory name changing between runs * test data added, ready for pr * fix eol linting error * Update modules/gatk4/genomicsdbimport/main.nf Co-authored-by: Francesco L <53608000+lescai@users.noreply.github.com> * merging with master * update push to show progress * tests now working untar able to pass data to genomicsdbimport * commit to checkout * tests updated, module reworked to simplify and emit updated gendb * Apply suggestions from code review Co-authored-by: Harshil Patel * update meta.yml Priority of input options changed, updated to reflect this * Update test.yml name prefix changed in main script, test.yml updated to reflect this * fix tests due to review changes Co-authored-by: GCJMackenzie Co-authored-by: Francesco L <53608000+lescai@users.noreply.github.com> Co-authored-by: Harshil Patel --- modules/gatk4/genomicsdbimport/functions.nf | 78 ++++++++++++++++++ modules/gatk4/genomicsdbimport/main.nf | 67 ++++++++++++++++ modules/gatk4/genomicsdbimport/meta.yml | 80 +++++++++++++++++++ tests/config/pytest_modules.yml | 4 + tests/config/test_data.config | 2 + tests/modules/gatk4/genomicsdbimport/main.nf | 61 ++++++++++++++ tests/modules/gatk4/genomicsdbimport/test.yml | 50 ++++++++++++ 7 files changed, 342 insertions(+) create mode 100644 modules/gatk4/genomicsdbimport/functions.nf create mode 100644 modules/gatk4/genomicsdbimport/main.nf create mode 100644 modules/gatk4/genomicsdbimport/meta.yml create mode 100644 tests/modules/gatk4/genomicsdbimport/main.nf create mode 100644 tests/modules/gatk4/genomicsdbimport/test.yml diff --git a/modules/gatk4/genomicsdbimport/functions.nf b/modules/gatk4/genomicsdbimport/functions.nf new file mode 100644 index 00000000..85628ee0 --- /dev/null +++ b/modules/gatk4/genomicsdbimport/functions.nf @@ -0,0 +1,78 @@ +// +// Utility functions used in nf-core DSL2 module files +// + +// +// Extract name of software tool from process name using $task.process +// +def getSoftwareName(task_process) { + return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase() +} + +// +// Extract name of module from process name using $task.process +// +def getProcessName(task_process) { + return task_process.tokenize(':')[-1] +} + +// +// Function to initialise default values and to generate a Groovy Map of available options for nf-core modules +// +def initOptions(Map args) { + def Map options = [:] + options.args = args.args ?: '' + options.args2 = args.args2 ?: '' + options.args3 = args.args3 ?: '' + options.publish_by_meta = args.publish_by_meta ?: [] + options.publish_dir = args.publish_dir ?: '' + options.publish_files = args.publish_files + options.suffix = args.suffix ?: '' + return options +} + +// +// Tidy up and join elements of a list to return a path string +// +def getPathFromList(path_list) { + def paths = path_list.findAll { item -> !item?.trim().isEmpty() } // Remove empty entries + paths = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes + return paths.join('/') +} + +// +// Function to save/publish module results +// +def saveFiles(Map args) { + def ioptions = initOptions(args.options) + def path_list = [ ioptions.publish_dir ?: args.publish_dir ] + + // Do not publish versions.yml unless running from pytest workflow + if (args.filename.equals('versions.yml') && !System.getenv("NF_CORE_MODULES_TEST")) { + return null + } + if (ioptions.publish_by_meta) { + def key_list = ioptions.publish_by_meta instanceof List ? ioptions.publish_by_meta : args.publish_by_meta + for (key in key_list) { + if (args.meta && key instanceof String) { + def path = key + if (args.meta.containsKey(key)) { + path = args.meta[key] instanceof Boolean ? "${key}_${args.meta[key]}".toString() : args.meta[key] + } + path = path instanceof String ? path : '' + path_list.add(path) + } + } + } + if (ioptions.publish_files instanceof Map) { + for (ext in ioptions.publish_files) { + if (args.filename.endsWith(ext.key)) { + def ext_list = path_list.collect() + ext_list.add(ext.value) + return "${getPathFromList(ext_list)}/$args.filename" + } + } + } else if (ioptions.publish_files == null) { + return "${getPathFromList(path_list)}/$args.filename" + } +} diff --git a/modules/gatk4/genomicsdbimport/main.nf b/modules/gatk4/genomicsdbimport/main.nf new file mode 100644 index 00000000..aa4fceb0 --- /dev/null +++ b/modules/gatk4/genomicsdbimport/main.nf @@ -0,0 +1,67 @@ +// Import generic module functions +include { initOptions; saveFiles; getSoftwareName; getProcessName } from './functions' + +params.options = [:] +options = initOptions(params.options) + +process GATK4_GENOMICSDBIMPORT { + tag "$meta.id" + label 'process_low' + publishDir "${params.outdir}", + mode: params.publish_dir_mode, + saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:meta, publish_by_meta:['id']) } + + conda (params.enable_conda ? "bioconda::gatk4=4.2.0.0" : null) + if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { + container "https://depot.galaxyproject.org/singularity/gatk4:4.2.0.0--0" + } else { + container "quay.io/biocontainers/gatk4:4.2.0.0--0" + } + + input: + tuple val(meta), path(vcf), path(tbi), path(intervalfile), val(intervalval), path(wspace) + val run_intlist + val run_updatewspace + val input_map + + output: + tuple val(meta), path("*_genomicsdb") , optional:true, emit: genomicsdb + tuple val(meta), path("$updated_db") , optional:true, emit: updatedb + tuple val(meta), path("*.interval_list"), optional:true, emit: intervallist + path "versions.yml" , emit: versions + + script: + def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}" + + // settings for running default create gendb mode + def inputs_command = input_map ? "--sample-name-map ${vcf[0]}" : "${'-V ' + vcf.join(' -V')}" + def dir_command = "--genomicsdb-workspace-path ${prefix}" + def intervals_command = intervalfile ? " -L ${intervalfile} " : " -L ${intervalval} " + + // settings changed for running get intervals list mode if run_intlist is true + if (run_intlist) { + inputs_command = '' + dir_command = "--genomicsdb-update-workspace-path ${wspace}" + intervals_command = "--output-interval-list-to-file ${prefix}.interval_list" + } + + // settings changed for running update gendb mode. inputs_command same as default, update_db forces module to emit the updated gendb + if (run_updatewspace) { + dir_command = "--genomicsdb-update-workspace-path ${wspace}" + intervals_command = '' + updated_db = wspace.toString() + } + + """ + gatk GenomicsDBImport \\ + $inputs_command \\ + $dir_command \\ + $intervals_command \\ + $options.args + + cat <<-END_VERSIONS > versions.yml + ${getProcessName(task.process)}: + ${getSoftwareName(task.process)}: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/gatk4/genomicsdbimport/meta.yml b/modules/gatk4/genomicsdbimport/meta.yml new file mode 100644 index 00000000..f7a32e7e --- /dev/null +++ b/modules/gatk4/genomicsdbimport/meta.yml @@ -0,0 +1,80 @@ +name: gatk4_genomicsdbimport +description: merge GVCFs from multiple samples. For use in joint genotyping or somatic panel of normal creation. +keywords: + - gatk4 + - genomicsdbimport + - genomicsdb + - panelofnormalscreation + - jointgenotyping +tools: + - gatk4: + description: | + Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools + with a primary focus on variant discovery and genotyping. Its powerful processing engine + and high-performance computing features make it capable of taking on projects of any size. + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s + doi: 10.1158/1538-7445.AM2017-3590 + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test'] + - vcf: + type: list + description: either a list of vcf files to be used to create or update a genomicsdb, or a file that contains a map to vcf files to be used. + pattern: "*.vcf.gz" + + - tbi: + type: list + description: list of tbi files that match with the input vcf files + pattern: "*.vcf.gz_tbi" + + - wspace: + type: path + description: path to an existing genomicsdb to be used in update db mode or get intervals mode. This WILL NOT specify name of a new genomicsdb in create db mode. + pattern: "/path/to/existing/gendb" + + - intervalfile: + type: file + description: file containing the intervals to be used when creating the genomicsdb + pattern: "*.interval_list" + + - intervalval: + type: string + description: if an intervals file has not been spcified, the value enetered here will be used as an interval via the "-L" argument + pattern: "example: chr1:1000-10000" + + - run_intlist: + type: boolean + description: Specify whether to run get interval list mode, this option cannot be specified at the same time as run_updatewspace. + pattern: "true/false" + + - run_updatewspace: + type: boolean + description: Specify whether to run update genomicsdb mode, this option takes priority over run_intlist. + pattern: "true/false" + + - input_map: + type: boolean + description: Specify whether the vcf input is providing a list of vcf file(s) or a single file containing a map of paths to vcf files to be used to create or update a genomicsdb. + pattern: "*.sample_map" + +output: + - genomicsdb: + type: directory + description: Directory containing the files that compose the genomicsdb workspace, this is only output for create mode, as update changes an existing db + pattern: "*_genomicsdb" + - intervallist: + type: file + description: File containing the intervals used to generate the genomicsdb, only created by get intervals mode. + pattern: "*.interval_list" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + +authors: + - "@GCJMackenzie" diff --git a/tests/config/pytest_modules.yml b/tests/config/pytest_modules.yml index d7fed0c0..db10b55b 100644 --- a/tests/config/pytest_modules.yml +++ b/tests/config/pytest_modules.yml @@ -450,6 +450,10 @@ gatk4/fastqtosam: - modules/gatk4/fastqtosam/** - tests/modules/gatk4/fastqtosam/** +gatk4/genomicsdbimport: + - modules/gatk4/genomicsdbimport/** + - tests/modules/gatk4/genomicsdbimport/** + gatk4/filtermutectcalls: - modules/gatk4/filtermutectcalls/** - tests/modules/gatk4/filtermutectcalls/** diff --git a/tests/config/test_data.config b/tests/config/test_data.config index c05e1c8f..d0489e03 100644 --- a/tests/config/test_data.config +++ b/tests/config/test_data.config @@ -103,6 +103,7 @@ params { genome_dict = "${test_data_dir}/genomics/homo_sapiens/genome/genome.dict" genome_gff3 = "${test_data_dir}/genomics/homo_sapiens/genome/genome.gff3" genome_gtf = "${test_data_dir}/genomics/homo_sapiens/genome/genome.gtf" + genome_interval_list = "${test_data_dir}/genomics/homo_sapiens/genome/genome.interval_list" genome_sizes = "${test_data_dir}/genomics/homo_sapiens/genome/genome.sizes" genome_bed = "${test_data_dir}/genomics/homo_sapiens/genome/genome.bed" genome_header = "${test_data_dir}/genomics/homo_sapiens/genome/genome.header" @@ -181,6 +182,7 @@ params { test2_baserecalibrator_table = "${test_data_dir}/genomics/homo_sapiens/illumina/gatk/test2.baserecalibrator.table" test_pileups_table = "${test_data_dir}/genomics/homo_sapiens/illumina/gatk/test.pileups.table" test2_pileups_table = "${test_data_dir}/genomics/homo_sapiens/illumina/gatk/test2.pileups.table" + test_genomicsdb_tar_gz = "${test_data_dir}/genomics/homo_sapiens/illumina/gatk/test_genomicsdb.tar.gz" test_test2_paired_mutect2_calls_vcf_gz = "${test_data_dir}/genomics/homo_sapiens/illumina/gatk/paired_mutect2_calls/test_test2_paired_mutect2_calls.vcf.gz" test_test2_paired_mutect2_calls_vcf_gz_tbi = "${test_data_dir}/genomics/homo_sapiens/illumina/gatk/paired_mutect2_calls/test_test2_paired_mutect2_calls.vcf.gz.tbi" diff --git a/tests/modules/gatk4/genomicsdbimport/main.nf b/tests/modules/gatk4/genomicsdbimport/main.nf new file mode 100644 index 00000000..ef67b04a --- /dev/null +++ b/tests/modules/gatk4/genomicsdbimport/main.nf @@ -0,0 +1,61 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { UNTAR } from '../../../../modules/untar/main.nf' addParams( options: [:] ) +include { GATK4_GENOMICSDBIMPORT } from '../../../../modules/gatk4/genomicsdbimport/main.nf' addParams( options: [:] ) + +workflow test_gatk4_genomicsdbimport_create_genomicsdb { + + input = [ [ id:'test_genomicsdb'], // meta map + file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf_gz'], checkIfExists: true) , + file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf_gz_tbi'], checkIfExists: true) , + file(params.test_data['homo_sapiens']['genome']['genome_interval_list'], checkIfExists: true) , + [] , + [] ] + + run_intlist = false + run_updatewspace = false + input_map = false + + GATK4_GENOMICSDBIMPORT ( input, run_intlist, run_updatewspace, input_map ) +} + +workflow test_gatk4_genomicsdbimport_get_intervalslist { + db = file(params.test_data['homo_sapiens']['illumina']['test_genomicsdb_tar_gz'], checkIfExists: true) + + UNTAR ( db ) + + def input = Channel.of([ [ id:'test_genomicsdb'], // meta map + [] , + [] , + [] , + [] ]) + .combine(UNTAR.out.untar) + + run_intlist = true + run_updatewspace = false + input_map = false + + GATK4_GENOMICSDBIMPORT ( input, run_intlist, run_updatewspace, input_map ) +} + +workflow test_gatk4_genomicsdbimport_update_genomicsdb { + db = file(params.test_data['homo_sapiens']['illumina']['test_genomicsdb_tar_gz'], checkIfExists: true) + + UNTAR ( db ) + + def input = Channel.of([ [ id:'test_genomicsdb'], // meta map + file( params.test_data['homo_sapiens']['illumina']['test2_genome_vcf_gz'] , checkIfExists: true) , + file( params.test_data['homo_sapiens']['illumina']['test2_genome_vcf_gz_tbi'] , checkIfExists: true) , + [] , + [] ]) + .combine(UNTAR.out.untar) + + run_intlist = false + run_updatewspace = true + input_map = false + + GATK4_GENOMICSDBIMPORT ( input, run_intlist, run_updatewspace, input_map ) + +} diff --git a/tests/modules/gatk4/genomicsdbimport/test.yml b/tests/modules/gatk4/genomicsdbimport/test.yml new file mode 100644 index 00000000..68f5ae7a --- /dev/null +++ b/tests/modules/gatk4/genomicsdbimport/test.yml @@ -0,0 +1,50 @@ +- name: gatk4 genomicsdbimport test_gatk4_genomicsdbimport_create_genomicsdb + command: nextflow run tests/modules/gatk4/genomicsdbimport -entry test_gatk4_genomicsdbimport_create_genomicsdb -c tests/config/nextflow.config + tags: + - gatk4/genomicsdbimport + - gatk4 + files: + - path: output/gatk4/test_genomicsdb/__tiledb_workspace.tdb + md5sum: d41d8cd98f00b204e9800998ecf8427e + - path: output/gatk4/test_genomicsdb/callset.json + md5sum: a7d07d1c86449bbb1091ff29368da07a + - path: output/gatk4/test_genomicsdb/chr22$1$40001/.__consolidation_lock + md5sum: d41d8cd98f00b204e9800998ecf8427e + - path: output/gatk4/test_genomicsdb/chr22$1$40001/__array_schema.tdb + - path: output/gatk4/test_genomicsdb/chr22$1$40001/genomicsdb_meta_dir/genomicsdb_column_bounds.json + md5sum: 2502f79658bc000578ebcfddfc1194c0 + - path: output/gatk4/test_genomicsdb/vcfheader.vcf + contains: + - "FORMAT= Date: Fri, 29 Oct 2021 17:45:32 +0100 Subject: [PATCH 06/15] FIX: Createsomaticpanelofnormals add requested test changes (#983) * files created for createsompon, script written, meta written, still needs tests * updated to 2.0.0 method input, however this requires a genomicsDB input now * script finished, meta yaml updated. Tests working locally, test yaml made, needs genomicsdb example on nf-core to run repository tests * versions updated, issue with test data not able to download directory * updated tests to include repo-side data * Apply suggestions from code review * Update modules/gatk4/createsomaticpanelofnormals/main.nf * temp commit to allow checkout * updated createsompon tests to use tarred gendb * resolve conflict * Update tests/modules/gatk4/createsomaticpanelofnormals/main.nf Co-authored-by: Harshil Patel Co-authored-by: GCJMackenzie Co-authored-by: Harshil Patel --- .../gatk4/createsomaticpanelofnormals/main.nf | 61 ++----------------- 1 file changed, 5 insertions(+), 56 deletions(-) diff --git a/tests/modules/gatk4/createsomaticpanelofnormals/main.nf b/tests/modules/gatk4/createsomaticpanelofnormals/main.nf index 34fc9847..6e5366f5 100644 --- a/tests/modules/gatk4/createsomaticpanelofnormals/main.nf +++ b/tests/modules/gatk4/createsomaticpanelofnormals/main.nf @@ -2,67 +2,16 @@ nextflow.enable.dsl = 2 +include { UNTAR } from '../../../../modules/untar/main.nf' addParams( options: [:] ) include { GATK4_CREATESOMATICPANELOFNORMALS } from '../../../../modules/gatk4/createsomaticpanelofnormals/main.nf' addParams( options: [suffix:'.pon'] ) workflow test_gatk4_createsomaticpanelofnormals { - maindir = file('test_genomicsdb') - subdir1 = file('test_genomicsdb/chr22$1$40001') - subdir2 = file('test_genomicsdb/chr22$1$40001/__3cf81648-433d-4464-be08-23d082445c9b139814474716928_1630588248448') - subdir3 = file('test_genomicsdb/chr22$1$40001/genomicsdb_meta_dir') - subdir2.mkdirs() - subdir3.mkdirs() + db = file(params.test_data['homo_sapiens']['illumina']['test_genomicsdb_tar_gz'], checkIfExists: true) - file( 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/gatk/test_genomicsdb/__tiledb_workspace.tdb' , checkIfExists: true).copyTo(maindir) - file( 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/gatk/test_genomicsdb/vcfheader.vcf' , checkIfExists: true).copyTo(maindir) - file( 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/gatk/test_genomicsdb/vidmap.json' , checkIfExists: true).copyTo(maindir) - file( 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/gatk/test_genomicsdb/callset.json' , checkIfExists: true).copyTo(maindir) - file( 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/gatk/test_genomicsdb/chr22$1$40001/.__consolidation_lock' , checkIfExists: true).copyTo(subdir1) - file( 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/gatk/test_genomicsdb/chr22$1$40001/__array_schema.tdb' , checkIfExists: true).copyTo(subdir1) - file( 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/gatk/test_genomicsdb/chr22$1$40001/genomicsdb_meta_dir/genomicsdb_column_bounds.json' , checkIfExists: true).copyTo(subdir3) - file( 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/gatk/test_genomicsdb/chr22$1$40001/genomicsdb_meta_dir/genomicsdb_meta_2b25a6c2-cb94-4a4a-9005-acb7c595d322.json' , checkIfExists: true).copyTo(subdir3) - file( 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/gatk/test_genomicsdb/chr22$1$40001/__3cf81648-433d-4464-be08-23d082445c9b139814474716928_1630588248448/AD.tdb' , checkIfExists: true).copyTo(subdir2) - file( 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/gatk/test_genomicsdb/chr22$1$40001/__3cf81648-433d-4464-be08-23d082445c9b139814474716928_1630588248448/AD_var.tdb' , checkIfExists: true).copyTo(subdir2) - file( 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/gatk/test_genomicsdb/chr22$1$40001/__3cf81648-433d-4464-be08-23d082445c9b139814474716928_1630588248448/ALT.tdb' , checkIfExists: true).copyTo(subdir2) - file( 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/gatk/test_genomicsdb/chr22$1$40001/__3cf81648-433d-4464-be08-23d082445c9b139814474716928_1630588248448/ALT_var.tdb' , checkIfExists: true).copyTo(subdir2) - file( 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/gatk/test_genomicsdb/chr22$1$40001/__3cf81648-433d-4464-be08-23d082445c9b139814474716928_1630588248448/BaseQRankSum.tdb' , checkIfExists: true).copyTo(subdir2) - file( 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/gatk/test_genomicsdb/chr22$1$40001/__3cf81648-433d-4464-be08-23d082445c9b139814474716928_1630588248448/DB.tdb' , checkIfExists: true).copyTo(subdir2) - file( 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/gatk/test_genomicsdb/chr22$1$40001/__3cf81648-433d-4464-be08-23d082445c9b139814474716928_1630588248448/DP.tdb' , checkIfExists: true).copyTo(subdir2) - file( 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/gatk/test_genomicsdb/chr22$1$40001/__3cf81648-433d-4464-be08-23d082445c9b139814474716928_1630588248448/DP_FORMAT.tdb' , checkIfExists: true).copyTo(subdir2) - file( 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/gatk/test_genomicsdb/chr22$1$40001/__3cf81648-433d-4464-be08-23d082445c9b139814474716928_1630588248448/END.tdb' , checkIfExists: true).copyTo(subdir2) - file( 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/gatk/test_genomicsdb/chr22$1$40001/__3cf81648-433d-4464-be08-23d082445c9b139814474716928_1630588248448/ExcessHet.tdb' , checkIfExists: true).copyTo(subdir2) - file( 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/gatk/test_genomicsdb/chr22$1$40001/__3cf81648-433d-4464-be08-23d082445c9b139814474716928_1630588248448/FILTER.tdb' , checkIfExists: true).copyTo(subdir2) - file( 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/gatk/test_genomicsdb/chr22$1$40001/__3cf81648-433d-4464-be08-23d082445c9b139814474716928_1630588248448/FILTER_var.tdb' , checkIfExists: true).copyTo(subdir2) - file( 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/gatk/test_genomicsdb/chr22$1$40001/__3cf81648-433d-4464-be08-23d082445c9b139814474716928_1630588248448/GQ.tdb' , checkIfExists: true).copyTo(subdir2) - file( 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/gatk/test_genomicsdb/chr22$1$40001/__3cf81648-433d-4464-be08-23d082445c9b139814474716928_1630588248448/GT.tdb' , checkIfExists: true).copyTo(subdir2) - file( 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/gatk/test_genomicsdb/chr22$1$40001/__3cf81648-433d-4464-be08-23d082445c9b139814474716928_1630588248448/GT_var.tdb' , checkIfExists: true).copyTo(subdir2) - file( 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/gatk/test_genomicsdb/chr22$1$40001/__3cf81648-433d-4464-be08-23d082445c9b139814474716928_1630588248448/ID.tdb' , checkIfExists: true).copyTo(subdir2) - file( 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/gatk/test_genomicsdb/chr22$1$40001/__3cf81648-433d-4464-be08-23d082445c9b139814474716928_1630588248448/ID_var.tdb' , checkIfExists: true).copyTo(subdir2) - file( 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/gatk/test_genomicsdb/chr22$1$40001/__3cf81648-433d-4464-be08-23d082445c9b139814474716928_1630588248448/InbreedingCoeff.tdb' , checkIfExists: true).copyTo(subdir2) - file( 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/gatk/test_genomicsdb/chr22$1$40001/__3cf81648-433d-4464-be08-23d082445c9b139814474716928_1630588248448/MIN_DP.tdb' , checkIfExists: true).copyTo(subdir2) - file( 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/gatk/test_genomicsdb/chr22$1$40001/__3cf81648-433d-4464-be08-23d082445c9b139814474716928_1630588248448/MLEAC.tdb' , checkIfExists: true).copyTo(subdir2) - file( 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/gatk/test_genomicsdb/chr22$1$40001/__3cf81648-433d-4464-be08-23d082445c9b139814474716928_1630588248448/MLEAC_var.tdb' , checkIfExists: true).copyTo(subdir2) - file( 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/gatk/test_genomicsdb/chr22$1$40001/__3cf81648-433d-4464-be08-23d082445c9b139814474716928_1630588248448/MLEAF.tdb' , checkIfExists: true).copyTo(subdir2) - file( 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/gatk/test_genomicsdb/chr22$1$40001/__3cf81648-433d-4464-be08-23d082445c9b139814474716928_1630588248448/MLEAF_var.tdb' , checkIfExists: true).copyTo(subdir2) - file( 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/gatk/test_genomicsdb/chr22$1$40001/__3cf81648-433d-4464-be08-23d082445c9b139814474716928_1630588248448/MQRankSum.tdb' , checkIfExists: true).copyTo(subdir2) - file( 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/gatk/test_genomicsdb/chr22$1$40001/__3cf81648-433d-4464-be08-23d082445c9b139814474716928_1630588248448/PGT.tdb' , checkIfExists: true).copyTo(subdir2) - file( 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/gatk/test_genomicsdb/chr22$1$40001/__3cf81648-433d-4464-be08-23d082445c9b139814474716928_1630588248448/PGT_var.tdb' , checkIfExists: true).copyTo(subdir2) - file( 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/gatk/test_genomicsdb/chr22$1$40001/__3cf81648-433d-4464-be08-23d082445c9b139814474716928_1630588248448/PID.tdb' , checkIfExists: true).copyTo(subdir2) - file( 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/gatk/test_genomicsdb/chr22$1$40001/__3cf81648-433d-4464-be08-23d082445c9b139814474716928_1630588248448/PID_var.tdb' , checkIfExists: true).copyTo(subdir2) - file( 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/gatk/test_genomicsdb/chr22$1$40001/__3cf81648-433d-4464-be08-23d082445c9b139814474716928_1630588248448/PL.tdb' , checkIfExists: true).copyTo(subdir2) - file( 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/gatk/test_genomicsdb/chr22$1$40001/__3cf81648-433d-4464-be08-23d082445c9b139814474716928_1630588248448/PL_var.tdb' , checkIfExists: true).copyTo(subdir2) - file( 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/gatk/test_genomicsdb/chr22$1$40001/__3cf81648-433d-4464-be08-23d082445c9b139814474716928_1630588248448/PS.tdb' , checkIfExists: true).copyTo(subdir2) - file( 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/gatk/test_genomicsdb/chr22$1$40001/__3cf81648-433d-4464-be08-23d082445c9b139814474716928_1630588248448/QUAL.tdb' , checkIfExists: true).copyTo(subdir2) - file( 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/gatk/test_genomicsdb/chr22$1$40001/__3cf81648-433d-4464-be08-23d082445c9b139814474716928_1630588248448/RAW_MQandDP.tdb' , checkIfExists: true).copyTo(subdir2) - file( 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/gatk/test_genomicsdb/chr22$1$40001/__3cf81648-433d-4464-be08-23d082445c9b139814474716928_1630588248448/REF.tdb' , checkIfExists: true).copyTo(subdir2) - file( 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/gatk/test_genomicsdb/chr22$1$40001/__3cf81648-433d-4464-be08-23d082445c9b139814474716928_1630588248448/REF_var.tdb' , checkIfExists: true).copyTo(subdir2) - file( 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/gatk/test_genomicsdb/chr22$1$40001/__3cf81648-433d-4464-be08-23d082445c9b139814474716928_1630588248448/ReadPosRankSum.tdb' , checkIfExists: true).copyTo(subdir2) - file( 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/gatk/test_genomicsdb/chr22$1$40001/__3cf81648-433d-4464-be08-23d082445c9b139814474716928_1630588248448/SB.tdb' , checkIfExists: true).copyTo(subdir2) - file( 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/gatk/test_genomicsdb/chr22$1$40001/__3cf81648-433d-4464-be08-23d082445c9b139814474716928_1630588248448/__book_keeping.tdb.gz' , checkIfExists: true).copyTo(subdir2) - file( 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/gatk/test_genomicsdb/chr22$1$40001/__3cf81648-433d-4464-be08-23d082445c9b139814474716928_1630588248448/__coords.tdb' , checkIfExists: true).copyTo(subdir2) - file( 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/gatk/test_genomicsdb/chr22$1$40001/__3cf81648-433d-4464-be08-23d082445c9b139814474716928_1630588248448/__tiledb_fragment.tdb' , checkIfExists: true).copyTo(subdir2) + UNTAR ( db ) - input = [ [ id:'test' ], // meta map - file( maindir , checkIfExists: true)] + input = Channel.of([ id:'test']) + .combine(UNTAR.out.untar) fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) fastaidx = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) From 9fb26ae46248c33ad858d79a36beb07912c85a89 Mon Sep 17 00:00:00 2001 From: Harshil Patel Date: Sat, 30 Oct 2021 09:52:13 +0100 Subject: [PATCH 07/15] Add IDR module (#908) * Add IDR module * Add meta and implement main todos * Modifying idr tests * Update tests/config/test_data.config Co-authored-by: Harshil Patel * Update tests/config/test_data.config Co-authored-by: Harshil Patel * Update main.nf * Update tests/config/test_data.config Co-authored-by: Harshil Patel * Update test with new file name Co-authored-by: Jose Espinosa-Carrasco --- modules/idr/functions.nf | 78 +++++++++++++++++++++++++++++++++ modules/idr/main.nf | 56 +++++++++++++++++++++++ modules/idr/meta.yml | 53 ++++++++++++++++++++++ tests/config/pytest_modules.yml | 4 ++ tests/config/test_data.config | 6 +++ tests/modules/idr/main.nf | 35 +++++++++++++++ tests/modules/idr/test.yml | 35 +++++++++++++++ 7 files changed, 267 insertions(+) create mode 100644 modules/idr/functions.nf create mode 100644 modules/idr/main.nf create mode 100644 modules/idr/meta.yml create mode 100644 tests/modules/idr/main.nf create mode 100644 tests/modules/idr/test.yml diff --git a/modules/idr/functions.nf b/modules/idr/functions.nf new file mode 100644 index 00000000..85628ee0 --- /dev/null +++ b/modules/idr/functions.nf @@ -0,0 +1,78 @@ +// +// Utility functions used in nf-core DSL2 module files +// + +// +// Extract name of software tool from process name using $task.process +// +def getSoftwareName(task_process) { + return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase() +} + +// +// Extract name of module from process name using $task.process +// +def getProcessName(task_process) { + return task_process.tokenize(':')[-1] +} + +// +// Function to initialise default values and to generate a Groovy Map of available options for nf-core modules +// +def initOptions(Map args) { + def Map options = [:] + options.args = args.args ?: '' + options.args2 = args.args2 ?: '' + options.args3 = args.args3 ?: '' + options.publish_by_meta = args.publish_by_meta ?: [] + options.publish_dir = args.publish_dir ?: '' + options.publish_files = args.publish_files + options.suffix = args.suffix ?: '' + return options +} + +// +// Tidy up and join elements of a list to return a path string +// +def getPathFromList(path_list) { + def paths = path_list.findAll { item -> !item?.trim().isEmpty() } // Remove empty entries + paths = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes + return paths.join('/') +} + +// +// Function to save/publish module results +// +def saveFiles(Map args) { + def ioptions = initOptions(args.options) + def path_list = [ ioptions.publish_dir ?: args.publish_dir ] + + // Do not publish versions.yml unless running from pytest workflow + if (args.filename.equals('versions.yml') && !System.getenv("NF_CORE_MODULES_TEST")) { + return null + } + if (ioptions.publish_by_meta) { + def key_list = ioptions.publish_by_meta instanceof List ? ioptions.publish_by_meta : args.publish_by_meta + for (key in key_list) { + if (args.meta && key instanceof String) { + def path = key + if (args.meta.containsKey(key)) { + path = args.meta[key] instanceof Boolean ? "${key}_${args.meta[key]}".toString() : args.meta[key] + } + path = path instanceof String ? path : '' + path_list.add(path) + } + } + } + if (ioptions.publish_files instanceof Map) { + for (ext in ioptions.publish_files) { + if (args.filename.endsWith(ext.key)) { + def ext_list = path_list.collect() + ext_list.add(ext.value) + return "${getPathFromList(ext_list)}/$args.filename" + } + } + } else if (ioptions.publish_files == null) { + return "${getPathFromList(path_list)}/$args.filename" + } +} diff --git a/modules/idr/main.nf b/modules/idr/main.nf new file mode 100644 index 00000000..006826ac --- /dev/null +++ b/modules/idr/main.nf @@ -0,0 +1,56 @@ +// Import generic module functions +include { initOptions; saveFiles; getSoftwareName; getProcessName } from './functions' + +params.options = [:] +options = initOptions(params.options) + +process IDR { + tag "$prefix" + label 'process_low' + publishDir "${params.outdir}", + mode: params.publish_dir_mode, + saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:[:], publish_by_meta:[]) } + + conda (params.enable_conda ? "bioconda::idr=2.0.4.2" : null) + if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { + container "https://depot.galaxyproject.org/singularity/idr:2.0.4.2--py39hcbe4a3b_5" + } else { + container "quay.io/biocontainers/idr:2.0.4.2--py38h9af456f_5" + } + + input: + path peaks + val peak_type + val prefix + + output: + path "*idrValues.txt", emit: idr + path "*log.txt" , emit: log + path "*.png" , emit: png + path "versions.yml" , emit: versions + + script: + if (peaks.toList().size < 2) { + log.error "[ERROR] idr needs at least two replicates only one provided." + } + def peak_types = ['narrowPeak', 'broadPeak', 'bed'] + if (!peak_types.contains(peak_type)) { + log.error "[ERROR] Invalid option: '${peak_type}'. Valid options for 'peak_type': ${peak_types.join(', ')}." + } + def idr_vals = prefix ? "${prefix}.idrValues.txt" : "idrValues.txt" + def log_file = prefix ? "${prefix}.log.txt" : "log.txt" + """ + idr \\ + --samples $peaks \\ + --input-file-type $peak_type \\ + --output-file $idr_vals \\ + --log-output-file $log_file \\ + --plot \\ + $options.args + + cat <<-END_VERSIONS > versions.yml + ${getProcessName(task.process)}: + ${getSoftwareName(task.process)}: \$(echo \$(idr --version 2>&1) | sed 's/^.*IDR //; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/idr/meta.yml b/modules/idr/meta.yml new file mode 100644 index 00000000..c89e72a4 --- /dev/null +++ b/modules/idr/meta.yml @@ -0,0 +1,53 @@ +name: idr +description: | + Measures reproducibility of ChIP-seq, ATAC-seq peaks using IDR (Irreproducible + Discovery Rate) +keywords: + - IDR + - peaks + - ChIP-seq + - ATAC-seq +tools: + - idr: + description: | + The IDR (Irreproducible Discovery Rate) framework is a unified approach + to measure the reproducibility of findings identified from replicate + experiments and provide highly stable thresholds based on reproducibility. + homepage: None + documentation: None + tool_dev_url: https://github.com/kundajelab/idr + doi: "" + licence: ['GPL v2'] +input: + - peaks: + type: tuple of two files + description: BED, narrowPeak or broadPeak files of replicates + pattern: "*" + - peak_type: + type: value + description: Type of peak file + pattern: "{narrowPeak,broadPeak,bed}" + - prefix: + type: value + description: Prefix for output files +output: + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - idr: + type: file + description: Text file containing IDR values + pattern: "*.{txt}" + - log: + type: file + description: Log file + pattern: "*.{txt}" + - png: + type: file + description: Plot generated by idr + pattern: "*{.png}" + +authors: + - "@drpatelh" + - "@joseespinosa" diff --git a/tests/config/pytest_modules.yml b/tests/config/pytest_modules.yml index db10b55b..ea999b6a 100644 --- a/tests/config/pytest_modules.yml +++ b/tests/config/pytest_modules.yml @@ -597,6 +597,10 @@ homer/makeucscfile: - modules/homer/makeucscfile/** - tests/modules/homer/makeucscfile/** +idr: + - modules/idr/** + - tests/modules/idr/** + iqtree: - modules/iqtree/** - tests/modules/iqtree/** diff --git a/tests/config/test_data.config b/tests/config/test_data.config index d0489e03..12252542 100644 --- a/tests/config/test_data.config +++ b/tests/config/test_data.config @@ -202,6 +202,12 @@ params { test2_genome_vcf_gz_tbi = "${test_data_dir}/genomics/homo_sapiens/illumina/gvcf/test2.genome.vcf.gz.tbi" test2_genome_vcf_idx = "${test_data_dir}/genomics/homo_sapiens/illumina/gvcf/test2.genome.vcf.idx" + test_broadpeak = "${test_data_dir}/genomics/homo_sapiens/illumina/broadpeak/test.broadPeak" + test2_broadpeak = "${test_data_dir}/genomics/homo_sapiens/illumina/broadpeak/test2.broadPeak" + + test_narrowpeak = "${test_data_dir}/genomics/homo_sapiens/illumina/narrowpeak/test.narrowPeak" + test2_narrowpeak = "${test_data_dir}/genomics/homo_sapiens/illumina/narrowpeak/test2.narrowPeak" + test_10x_1_fastq_gz = "${test_data_dir}/genomics/homo_sapiens/illumina/10xgenomics/test.10x_1.fastq.gz" test_10x_2_fastq_gz = "${test_data_dir}/genomics/homo_sapiens/illumina/10xgenomics/test.10x_2.fastq.gz" diff --git a/tests/modules/idr/main.nf b/tests/modules/idr/main.nf new file mode 100644 index 00000000..aa141a57 --- /dev/null +++ b/tests/modules/idr/main.nf @@ -0,0 +1,35 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { IDR } from '../../../modules/idr/main.nf' addParams( options: [:] ) + +workflow test_idr_narrowpeak { + + input = [ + file(params.test_data['homo_sapiens']['illumina']['test_narrowpeak'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test2_narrowpeak'], checkIfExists: true) + ] + + IDR ( input, 'narrowPeak', 'test' ) +} + +workflow test_idr_broadpeak { + + input = [ + file(params.test_data['homo_sapiens']['illumina']['test_broadpeak'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test2_broadpeak'], checkIfExists: true) + ] + + IDR ( input, 'broadPeak', 'test' ) +} + +workflow test_idr_noprefix { + + input = [ + file(params.test_data['homo_sapiens']['illumina']['test_narrowpeak'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test2_narrowpeak'], checkIfExists: true) + ] + + IDR ( input, 'narrowPeak', '' ) +} diff --git a/tests/modules/idr/test.yml b/tests/modules/idr/test.yml new file mode 100644 index 00000000..35ee4bc9 --- /dev/null +++ b/tests/modules/idr/test.yml @@ -0,0 +1,35 @@ +- name: idr test_idr_narrowpeak + command: nextflow run tests/modules/idr -entry test_idr_narrowpeak -c tests/config/nextflow.config + tags: + - idr + files: + - path: output/idr/test.idrValues.txt + md5sum: 09be837cc6abbc3eb5958b74802eea55 + - path: output/idr/test.idrValues.txt.png + md5sum: 4a7143ccc0ccadb37c2317bf626e6d96 + - path: output/idr/test.log.txt + md5sum: 6443507ac66b9d3b64bc56b78328083e + +- name: idr test_idr_broadpeak + command: nextflow run tests/modules/idr -entry test_idr_broadpeak -c tests/config/nextflow.config + tags: + - idr + files: + - path: output/idr/test.idrValues.txt + md5sum: 387441c716815e4caec3e70a2cc11a4a + - path: output/idr/test.idrValues.txt.png + md5sum: 7204083ca5b920b4215a5991c12cb4e7 + - path: output/idr/test.log.txt + md5sum: e6917133112b5cec135c182ffac19237 + +- name: idr test_idr_noprefix + command: nextflow run tests/modules/idr -entry test_idr_noprefix -c tests/config/nextflow.config + tags: + - idr + files: + - path: output/idr/idrValues.txt + md5sum: 09be837cc6abbc3eb5958b74802eea55 + - path: output/idr/idrValues.txt.png + md5sum: 4a7143ccc0ccadb37c2317bf626e6d96 + - path: output/idr/log.txt + md5sum: 6443507ac66b9d3b64bc56b78328083e From 81d65d4978a988b20ac21abf6a99c1baf936be65 Mon Sep 17 00:00:00 2001 From: Simon Pearce <24893913+SPPearce@users.noreply.github.com> Date: Mon, 1 Nov 2021 12:12:14 +0000 Subject: [PATCH 08/15] Samtools fixmate module (#991) * Samtools fixmate module * Update modules/samtools/fixmate/meta.yml Co-authored-by: James A. Fellows Yates * Update modules/samtools/fixmate/main.nf Co-authored-by: James A. Fellows Yates * Update main.nf * Update modules/samtools/fixmate/meta.yml Co-authored-by: James A. Fellows Yates * Update meta.yml Co-authored-by: Simon Pearce Co-authored-by: James A. Fellows Yates --- modules/samtools/fixmate/functions.nf | 78 +++++++++++++++++++++++++ modules/samtools/fixmate/main.nf | 45 ++++++++++++++ modules/samtools/fixmate/meta.yml | 49 ++++++++++++++++ tests/config/pytest_modules.yml | 4 ++ tests/modules/samtools/fixmate/main.nf | 14 +++++ tests/modules/samtools/fixmate/test.yml | 8 +++ 6 files changed, 198 insertions(+) create mode 100644 modules/samtools/fixmate/functions.nf create mode 100644 modules/samtools/fixmate/main.nf create mode 100644 modules/samtools/fixmate/meta.yml create mode 100644 tests/modules/samtools/fixmate/main.nf create mode 100644 tests/modules/samtools/fixmate/test.yml diff --git a/modules/samtools/fixmate/functions.nf b/modules/samtools/fixmate/functions.nf new file mode 100644 index 00000000..85628ee0 --- /dev/null +++ b/modules/samtools/fixmate/functions.nf @@ -0,0 +1,78 @@ +// +// Utility functions used in nf-core DSL2 module files +// + +// +// Extract name of software tool from process name using $task.process +// +def getSoftwareName(task_process) { + return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase() +} + +// +// Extract name of module from process name using $task.process +// +def getProcessName(task_process) { + return task_process.tokenize(':')[-1] +} + +// +// Function to initialise default values and to generate a Groovy Map of available options for nf-core modules +// +def initOptions(Map args) { + def Map options = [:] + options.args = args.args ?: '' + options.args2 = args.args2 ?: '' + options.args3 = args.args3 ?: '' + options.publish_by_meta = args.publish_by_meta ?: [] + options.publish_dir = args.publish_dir ?: '' + options.publish_files = args.publish_files + options.suffix = args.suffix ?: '' + return options +} + +// +// Tidy up and join elements of a list to return a path string +// +def getPathFromList(path_list) { + def paths = path_list.findAll { item -> !item?.trim().isEmpty() } // Remove empty entries + paths = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes + return paths.join('/') +} + +// +// Function to save/publish module results +// +def saveFiles(Map args) { + def ioptions = initOptions(args.options) + def path_list = [ ioptions.publish_dir ?: args.publish_dir ] + + // Do not publish versions.yml unless running from pytest workflow + if (args.filename.equals('versions.yml') && !System.getenv("NF_CORE_MODULES_TEST")) { + return null + } + if (ioptions.publish_by_meta) { + def key_list = ioptions.publish_by_meta instanceof List ? ioptions.publish_by_meta : args.publish_by_meta + for (key in key_list) { + if (args.meta && key instanceof String) { + def path = key + if (args.meta.containsKey(key)) { + path = args.meta[key] instanceof Boolean ? "${key}_${args.meta[key]}".toString() : args.meta[key] + } + path = path instanceof String ? path : '' + path_list.add(path) + } + } + } + if (ioptions.publish_files instanceof Map) { + for (ext in ioptions.publish_files) { + if (args.filename.endsWith(ext.key)) { + def ext_list = path_list.collect() + ext_list.add(ext.value) + return "${getPathFromList(ext_list)}/$args.filename" + } + } + } else if (ioptions.publish_files == null) { + return "${getPathFromList(path_list)}/$args.filename" + } +} diff --git a/modules/samtools/fixmate/main.nf b/modules/samtools/fixmate/main.nf new file mode 100644 index 00000000..e1a766a1 --- /dev/null +++ b/modules/samtools/fixmate/main.nf @@ -0,0 +1,45 @@ +// Import generic module functions +include { initOptions; saveFiles; getSoftwareName; getProcessName } from './functions' + +params.options = [:] +options = initOptions(params.options) + +process SAMTOOLS_FIXMATE { + tag "$meta.id" + label 'process_low' + publishDir "${params.outdir}", + mode: params.publish_dir_mode, + saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:meta, publish_by_meta:['id']) } + + conda (params.enable_conda ? "bioconda::samtools=1.14" : null) + if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { + container "https://depot.galaxyproject.org/singularity/samtools:1.14--hb421002_0" + } else { + container "quay.io/biocontainers/samtools:1.14--hb421002_0" + } + + input: + tuple val(meta), path(bam) + + output: + tuple val(meta), path("*.bam"), emit: bam + path "versions.yml" , emit: versions + + script: + def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}" + if ("$bam" == "${prefix}.bam") error "Input and output names are the same, use the suffix option to disambiguate!" + + """ + samtools \\ + fixmate \\ + $options.args \\ + -@ $task.cpus \\ + $bam \\ + ${prefix}.bam \\ + + cat <<-END_VERSIONS > versions.yml + ${getProcessName(task.process)}: + ${getSoftwareName(task.process)}: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/samtools/fixmate/meta.yml b/modules/samtools/fixmate/meta.yml new file mode 100644 index 00000000..2cec6e7c --- /dev/null +++ b/modules/samtools/fixmate/meta.yml @@ -0,0 +1,49 @@ +name: samtools_fixmate +description: Samtools fixmate is a tool that can fill in information (insert size, cigar, mapq) about paired end reads onto the corresponding other read. Also has options to remove secondary/unmapped alignments and recalculate whether reads are proper pairs. +keywords: + - fixmate + - samtools + - insert size + - repair + - bam + - paired + - read pairs +tools: + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: http://www.htslib.org/doc/samtools.html + tool_dev_url: https://github.com/samtools/samtools + doi: 10.1093/bioinformatics/btp352 + licence: ['MIT'] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM/CRAM/SAM file, must be sorted by name, not coordinate + pattern: "*.{bam,cram,sam}" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - bam: + type: file + description: A BAM/CRAM/SAM file with mate information added and/or proper pairs recalled + pattern: "*.{bam,cram,sam}" + +authors: + - "@sppearce" diff --git a/tests/config/pytest_modules.yml b/tests/config/pytest_modules.yml index ea999b6a..6ec2d506 100644 --- a/tests/config/pytest_modules.yml +++ b/tests/config/pytest_modules.yml @@ -1027,6 +1027,10 @@ samtools/fastq: - modules/samtools/fastq/** - tests/modules/samtools/fastq/** +samtools/fixmate: + - modules/samtools/fixmate/** + - tests/modules/samtools/fixmate/** + samtools/flagstat: - modules/samtools/flagstat/** - tests/modules/samtools/flagstat/** diff --git a/tests/modules/samtools/fixmate/main.nf b/tests/modules/samtools/fixmate/main.nf new file mode 100644 index 00000000..5174beab --- /dev/null +++ b/tests/modules/samtools/fixmate/main.nf @@ -0,0 +1,14 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { SAMTOOLS_FIXMATE } from '../../../../modules/samtools/fixmate/main.nf' addParams( options: [args:'-r -c -m'] ) + +workflow test_samtools_fixmate { + + input = [ [ id:'test', single_end:false ], // meta map + file(params.test_data['sarscov2']['illumina']['test_paired_end_bam'], checkIfExists: true) ] + + SAMTOOLS_FIXMATE ( input ) + +} diff --git a/tests/modules/samtools/fixmate/test.yml b/tests/modules/samtools/fixmate/test.yml new file mode 100644 index 00000000..c7864c04 --- /dev/null +++ b/tests/modules/samtools/fixmate/test.yml @@ -0,0 +1,8 @@ +- name: samtools fixmate test_samtools_fixmate + command: nextflow run tests/modules/samtools/fixmate -entry test_samtools_fixmate -c tests/config/nextflow.config + tags: + - samtools/fixmate + - samtools + files: + - path: output/samtools/test.bam + md5sum: 92c8463710cdcaef2010aa02ed9e01fd From cac6dc83bb09d7f33640692fa37d4cb9087abf00 Mon Sep 17 00:00:00 2001 From: Simon Pearce <24893913+SPPearce@users.noreply.github.com> Date: Tue, 2 Nov 2021 09:59:25 +0000 Subject: [PATCH 09/15] gccounter module for hmmcopy (#1003) * hmmcopy gccounter working * Update modules/hmmcopy/gccounter/main.nf Co-authored-by: Chris Cheshire * Update main.nf Changed version to 0.1.1 as the container says Co-authored-by: Simon Pearce Co-authored-by: Chris Cheshire --- modules/hmmcopy/gccounter/functions.nf | 78 ++++++++++++++++++++++++ modules/hmmcopy/gccounter/main.nf | 40 ++++++++++++ modules/hmmcopy/gccounter/meta.yml | 33 ++++++++++ tests/config/pytest_modules.yml | 4 ++ tests/modules/hmmcopy/gccounter/main.nf | 11 ++++ tests/modules/hmmcopy/gccounter/test.yml | 8 +++ 6 files changed, 174 insertions(+) create mode 100644 modules/hmmcopy/gccounter/functions.nf create mode 100644 modules/hmmcopy/gccounter/main.nf create mode 100644 modules/hmmcopy/gccounter/meta.yml create mode 100644 tests/modules/hmmcopy/gccounter/main.nf create mode 100644 tests/modules/hmmcopy/gccounter/test.yml diff --git a/modules/hmmcopy/gccounter/functions.nf b/modules/hmmcopy/gccounter/functions.nf new file mode 100644 index 00000000..85628ee0 --- /dev/null +++ b/modules/hmmcopy/gccounter/functions.nf @@ -0,0 +1,78 @@ +// +// Utility functions used in nf-core DSL2 module files +// + +// +// Extract name of software tool from process name using $task.process +// +def getSoftwareName(task_process) { + return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase() +} + +// +// Extract name of module from process name using $task.process +// +def getProcessName(task_process) { + return task_process.tokenize(':')[-1] +} + +// +// Function to initialise default values and to generate a Groovy Map of available options for nf-core modules +// +def initOptions(Map args) { + def Map options = [:] + options.args = args.args ?: '' + options.args2 = args.args2 ?: '' + options.args3 = args.args3 ?: '' + options.publish_by_meta = args.publish_by_meta ?: [] + options.publish_dir = args.publish_dir ?: '' + options.publish_files = args.publish_files + options.suffix = args.suffix ?: '' + return options +} + +// +// Tidy up and join elements of a list to return a path string +// +def getPathFromList(path_list) { + def paths = path_list.findAll { item -> !item?.trim().isEmpty() } // Remove empty entries + paths = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes + return paths.join('/') +} + +// +// Function to save/publish module results +// +def saveFiles(Map args) { + def ioptions = initOptions(args.options) + def path_list = [ ioptions.publish_dir ?: args.publish_dir ] + + // Do not publish versions.yml unless running from pytest workflow + if (args.filename.equals('versions.yml') && !System.getenv("NF_CORE_MODULES_TEST")) { + return null + } + if (ioptions.publish_by_meta) { + def key_list = ioptions.publish_by_meta instanceof List ? ioptions.publish_by_meta : args.publish_by_meta + for (key in key_list) { + if (args.meta && key instanceof String) { + def path = key + if (args.meta.containsKey(key)) { + path = args.meta[key] instanceof Boolean ? "${key}_${args.meta[key]}".toString() : args.meta[key] + } + path = path instanceof String ? path : '' + path_list.add(path) + } + } + } + if (ioptions.publish_files instanceof Map) { + for (ext in ioptions.publish_files) { + if (args.filename.endsWith(ext.key)) { + def ext_list = path_list.collect() + ext_list.add(ext.value) + return "${getPathFromList(ext_list)}/$args.filename" + } + } + } else if (ioptions.publish_files == null) { + return "${getPathFromList(path_list)}/$args.filename" + } +} diff --git a/modules/hmmcopy/gccounter/main.nf b/modules/hmmcopy/gccounter/main.nf new file mode 100644 index 00000000..6e7bc11f --- /dev/null +++ b/modules/hmmcopy/gccounter/main.nf @@ -0,0 +1,40 @@ +// Import generic module functions +include { initOptions; saveFiles; getSoftwareName; getProcessName } from './functions' + +params.options = [:] +options = initOptions(params.options) + +def VERSION = '0.1.1' + +process HMMCOPY_GCCOUNTER { + label 'process_low' + publishDir "${params.outdir}", + mode: params.publish_dir_mode, + saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:[:], publish_by_meta:[]) } + + conda (params.enable_conda ? "bioconda::hmmcopy=0.1.1" : null) + if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { + container "https://depot.galaxyproject.org/singularity/hmmcopy:0.1.1--h2e03b76_5" + } else { + container "quay.io/biocontainers/hmmcopy:0.1.1--h2e03b76_5" + } + + input: + path fasta + + output: + path "*.gc.wig" , emit: wig + path "versions.yml", emit: versions + + script: + """ + gcCounter \\ + $options.args \\ + ${fasta} > ${fasta.baseName}.gc.wig + + cat <<-END_VERSIONS > versions.yml + ${getProcessName(task.process)}: + ${getSoftwareName(task.process)}: \$(echo $VERSION) + END_VERSIONS + """ +} diff --git a/modules/hmmcopy/gccounter/meta.yml b/modules/hmmcopy/gccounter/meta.yml new file mode 100644 index 00000000..71727af2 --- /dev/null +++ b/modules/hmmcopy/gccounter/meta.yml @@ -0,0 +1,33 @@ +name: hmmcopy_gccounter +description: gcCounter function from HMMcopy utilities, used to generate GC content in non-overlapping windows from a fasta reference +keywords: + - hmmcopy + - gccounter + - cnv +tools: + - hmmcopy: + description: C++ based programs for analyzing BAM files and preparing read counts -- used with bioconductor-hmmcopy + homepage: https://github.com/shahcompbio/hmmcopy_utils + documentation: https://github.com/shahcompbio/hmmcopy_utils + tool_dev_url: https://github.com/shahcompbio/hmmcopy_utils + doi: "" + licence: ['GPL v3'] + +input: + - fasta: + type: file + description: Input genome fasta file + + +output: + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - wig: + type: file + description: wig file containing gc content of each window of the genome + pattern: "*.{gc.wig}" + +authors: + - "@sppearce" diff --git a/tests/config/pytest_modules.yml b/tests/config/pytest_modules.yml index 6ec2d506..4e4526b1 100644 --- a/tests/config/pytest_modules.yml +++ b/tests/config/pytest_modules.yml @@ -577,6 +577,10 @@ hisat2/extractsplicesites: - modules/hisat2/extractsplicesites/** - tests/modules/hisat2/extractsplicesites/** +hmmcopy/gccounter: + - modules/hmmcopy/gccounter/** + - tests/modules/hmmcopy/gccounter/** + hmmer/hmmalign: - modules/hmmer/hmmalign/** - tests/modules/hmmer/hmmalign/** diff --git a/tests/modules/hmmcopy/gccounter/main.nf b/tests/modules/hmmcopy/gccounter/main.nf new file mode 100644 index 00000000..30846ca9 --- /dev/null +++ b/tests/modules/hmmcopy/gccounter/main.nf @@ -0,0 +1,11 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { HMMCOPY_GCCOUNTER } from '../../../../modules/hmmcopy/gccounter/main.nf' addParams( options: [:] ) + +workflow test_hmmcopy_gccounter { + fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + + HMMCOPY_GCCOUNTER (fasta) +} diff --git a/tests/modules/hmmcopy/gccounter/test.yml b/tests/modules/hmmcopy/gccounter/test.yml new file mode 100644 index 00000000..edcd6b92 --- /dev/null +++ b/tests/modules/hmmcopy/gccounter/test.yml @@ -0,0 +1,8 @@ +- name: hmmcopy gccounter test_hmmcopy_gccounter + command: nextflow run tests/modules/hmmcopy/gccounter -entry test_hmmcopy_gccounter -c tests/config/nextflow.config + tags: + - hmmcopy + - hmmcopy/gccounter + files: + - path: output/hmmcopy/genome.gc.wig + md5sum: 59ad14bc5aaa903187d7b248c9490deb From 374d81e0b39d24ec2ef84b6f931c74ded5e3e682 Mon Sep 17 00:00:00 2001 From: Simon Pearce <24893913+SPPearce@users.noreply.github.com> Date: Tue, 2 Nov 2021 11:01:13 +0000 Subject: [PATCH 10/15] readcounter module for hmmcopy (#1001) * readcounter module for hmmcopy * Changed version number * Fix indentation * Update main.nf * Update modules/hmmcopy/readcounter/main.nf Co-authored-by: Chris Cheshire Co-authored-by: Simon Pearce Co-authored-by: Chris Cheshire --- modules/hmmcopy/readcounter/functions.nf | 78 ++++++++++++++++++++++ modules/hmmcopy/readcounter/main.nf | 42 ++++++++++++ modules/hmmcopy/readcounter/meta.yml | 43 ++++++++++++ tests/config/pytest_modules.yml | 4 ++ tests/modules/hmmcopy/readcounter/main.nf | 14 ++++ tests/modules/hmmcopy/readcounter/test.yml | 8 +++ 6 files changed, 189 insertions(+) create mode 100644 modules/hmmcopy/readcounter/functions.nf create mode 100644 modules/hmmcopy/readcounter/main.nf create mode 100644 modules/hmmcopy/readcounter/meta.yml create mode 100644 tests/modules/hmmcopy/readcounter/main.nf create mode 100644 tests/modules/hmmcopy/readcounter/test.yml diff --git a/modules/hmmcopy/readcounter/functions.nf b/modules/hmmcopy/readcounter/functions.nf new file mode 100644 index 00000000..85628ee0 --- /dev/null +++ b/modules/hmmcopy/readcounter/functions.nf @@ -0,0 +1,78 @@ +// +// Utility functions used in nf-core DSL2 module files +// + +// +// Extract name of software tool from process name using $task.process +// +def getSoftwareName(task_process) { + return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase() +} + +// +// Extract name of module from process name using $task.process +// +def getProcessName(task_process) { + return task_process.tokenize(':')[-1] +} + +// +// Function to initialise default values and to generate a Groovy Map of available options for nf-core modules +// +def initOptions(Map args) { + def Map options = [:] + options.args = args.args ?: '' + options.args2 = args.args2 ?: '' + options.args3 = args.args3 ?: '' + options.publish_by_meta = args.publish_by_meta ?: [] + options.publish_dir = args.publish_dir ?: '' + options.publish_files = args.publish_files + options.suffix = args.suffix ?: '' + return options +} + +// +// Tidy up and join elements of a list to return a path string +// +def getPathFromList(path_list) { + def paths = path_list.findAll { item -> !item?.trim().isEmpty() } // Remove empty entries + paths = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes + return paths.join('/') +} + +// +// Function to save/publish module results +// +def saveFiles(Map args) { + def ioptions = initOptions(args.options) + def path_list = [ ioptions.publish_dir ?: args.publish_dir ] + + // Do not publish versions.yml unless running from pytest workflow + if (args.filename.equals('versions.yml') && !System.getenv("NF_CORE_MODULES_TEST")) { + return null + } + if (ioptions.publish_by_meta) { + def key_list = ioptions.publish_by_meta instanceof List ? ioptions.publish_by_meta : args.publish_by_meta + for (key in key_list) { + if (args.meta && key instanceof String) { + def path = key + if (args.meta.containsKey(key)) { + path = args.meta[key] instanceof Boolean ? "${key}_${args.meta[key]}".toString() : args.meta[key] + } + path = path instanceof String ? path : '' + path_list.add(path) + } + } + } + if (ioptions.publish_files instanceof Map) { + for (ext in ioptions.publish_files) { + if (args.filename.endsWith(ext.key)) { + def ext_list = path_list.collect() + ext_list.add(ext.value) + return "${getPathFromList(ext_list)}/$args.filename" + } + } + } else if (ioptions.publish_files == null) { + return "${getPathFromList(path_list)}/$args.filename" + } +} diff --git a/modules/hmmcopy/readcounter/main.nf b/modules/hmmcopy/readcounter/main.nf new file mode 100644 index 00000000..9e3e72a7 --- /dev/null +++ b/modules/hmmcopy/readcounter/main.nf @@ -0,0 +1,42 @@ +// Import generic module functions +include { initOptions; saveFiles; getSoftwareName; getProcessName } from './functions' + +params.options = [:] +options = initOptions(params.options) + +def VERSION = '0.1.1' + +process HMMCOPY_READCOUNTER { + tag "$meta.id" + label 'process_low' + publishDir "${params.outdir}", + mode: params.publish_dir_mode, + saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:meta, publish_by_meta:['id']) } + + conda (params.enable_conda ? "bioconda::hmmcopy=0.1.1" : null) + if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { + container "https://depot.galaxyproject.org/singularity/hmmcopy:0.1.1--h2e03b76_5" + } else { + container "quay.io/biocontainers/hmmcopy:0.1.1--h2e03b76_5" + } + + input: + tuple val(meta), path(bam), path(bai) + + output: + tuple val(meta), path("*.wig"), emit: wig + path "versions.yml" , emit: versions + + script: + def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}" + """ + readCounter \\ + $options.args \\ + ${bam} > ${prefix}.wig + + cat <<-END_VERSIONS > versions.yml + ${getProcessName(task.process)}: + ${getSoftwareName(task.process)}: \$(echo $VERSION) + END_VERSIONS + """ +} diff --git a/modules/hmmcopy/readcounter/meta.yml b/modules/hmmcopy/readcounter/meta.yml new file mode 100644 index 00000000..9b09a55c --- /dev/null +++ b/modules/hmmcopy/readcounter/meta.yml @@ -0,0 +1,43 @@ +name: hmmcopy_readcounter +description: readCounter function from HMMcopy utilities, used to generate read in windows +keywords: + - hmmcopy + - readcounter + - cnv +tools: + - hmmcopy: + description: C++ based programs for analyzing BAM files and preparing read counts -- used with bioconductor-hmmcopy + homepage: https://github.com/shahcompbio/hmmcopy_utils + documentation: https://github.com/shahcompbio/hmmcopy_utils + tool_dev_url: https://github.com/shahcompbio/hmmcopy_utils + doi: "" + licence: ['GPL v3'] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - wig: + type: file + description: A wig file with the number of reads lying within each window in each chromosome + pattern: "*.wig" + +authors: + - "@sppearce" diff --git a/tests/config/pytest_modules.yml b/tests/config/pytest_modules.yml index 4e4526b1..9320245f 100644 --- a/tests/config/pytest_modules.yml +++ b/tests/config/pytest_modules.yml @@ -581,6 +581,10 @@ hmmcopy/gccounter: - modules/hmmcopy/gccounter/** - tests/modules/hmmcopy/gccounter/** +hmmcopy/readcounter: + - modules/hmmcopy/readcounter/** + - tests/modules/hmmcopy/readcounter/** + hmmer/hmmalign: - modules/hmmer/hmmalign/** - tests/modules/hmmer/hmmalign/** diff --git a/tests/modules/hmmcopy/readcounter/main.nf b/tests/modules/hmmcopy/readcounter/main.nf new file mode 100644 index 00000000..9025f98e --- /dev/null +++ b/tests/modules/hmmcopy/readcounter/main.nf @@ -0,0 +1,14 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { HMMCOPY_READCOUNTER } from '../../../../modules/hmmcopy/readcounter/main.nf' addParams( options: [:] ) + +workflow test_hmmcopy_readcounter { + + input = [ [ id:'test'], // meta map + [ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_recalibrated_sorted_bam'], checkIfExists: true)], + [ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_recalibrated_sorted_bam_bai'], checkIfExists: true)] + ] + HMMCOPY_READCOUNTER ( input ) +} diff --git a/tests/modules/hmmcopy/readcounter/test.yml b/tests/modules/hmmcopy/readcounter/test.yml new file mode 100644 index 00000000..6c00ee08 --- /dev/null +++ b/tests/modules/hmmcopy/readcounter/test.yml @@ -0,0 +1,8 @@ +- name: hmmcopy readcounter test_hmmcopy_readcounter + command: nextflow run tests/modules/hmmcopy/readcounter -entry test_hmmcopy_readcounter -c tests/config/nextflow.config + tags: + - hmmcopy + - hmmcopy/readcounter + files: + - path: output/hmmcopy/test.wig + md5sum: 3655d8325baea81b3b690791262c6b57 From 4619d012e562a224b69d9c09d51e12e4394f3238 Mon Sep 17 00:00:00 2001 From: fbdtemme <69114541+fbdtemme@users.noreply.github.com> Date: Tue, 2 Nov 2021 17:00:25 +0100 Subject: [PATCH 11/15] Add cram support to Allelecounter module (#1013) * Add CRAM support to allelecounter * Update meta.yml * Rename bam,bai to input,input_index * Apply suggestions from code review * Fix reference to renamed variable Co-authored-by: Maxime U. Garcia --- modules/allelecounter/main.nf | 8 ++++++-- modules/allelecounter/meta.yml | 9 ++++++--- tests/modules/allelecounter/main.nf | 16 ++++++++++++++-- tests/modules/allelecounter/test.yml | 12 ++++++++++-- 4 files changed, 36 insertions(+), 9 deletions(-) diff --git a/modules/allelecounter/main.nf b/modules/allelecounter/main.nf index 5184df7d..8d986579 100644 --- a/modules/allelecounter/main.nf +++ b/modules/allelecounter/main.nf @@ -19,8 +19,9 @@ process ALLELECOUNTER { } input: - tuple val(meta), path(bam), path(bai) + tuple val(meta), path(input), path(input_index) path loci + path fasta output: tuple val(meta), path("*.alleleCount"), emit: allelecount @@ -28,11 +29,14 @@ process ALLELECOUNTER { script: def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}" + def reference_options = fasta ? "-r $fasta": "" + """ alleleCounter \\ $options.args \\ -l $loci \\ - -b $bam \\ + -b $input \\ + $reference_options \\ -o ${prefix}.alleleCount cat <<-END_VERSIONS > versions.yml diff --git a/modules/allelecounter/meta.yml b/modules/allelecounter/meta.yml index a15f3eac..7d921e12 100644 --- a/modules/allelecounter/meta.yml +++ b/modules/allelecounter/meta.yml @@ -19,11 +19,11 @@ input: description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - - bam: + - input: type: file description: BAM/CRAM/SAM file pattern: "*.{bam,cram,sam}" - - bai: + - input_index: type: file description: BAM/CRAM/SAM index file pattern: "*.{bai,crai,sai}" @@ -31,7 +31,9 @@ input: type: file description: loci file pattern: "*.{tsv}" - + - fasta: + type: file + description: Input genome fasta file. Required when passing CRAM files. output: - meta: @@ -50,3 +52,4 @@ output: authors: - "@fullama" + - "@fbdtemme" diff --git a/tests/modules/allelecounter/main.nf b/tests/modules/allelecounter/main.nf index 542529c2..b938ab94 100644 --- a/tests/modules/allelecounter/main.nf +++ b/tests/modules/allelecounter/main.nf @@ -3,12 +3,24 @@ nextflow.enable.dsl = 2 include { ALLELECOUNTER } from '../../../modules/allelecounter/main.nf' addParams( options: [:] ) -workflow test_allelecounter { +workflow test_allelecounter_bam { input = [ [ id:'test', single_end:false ], // meta map file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true) ] positions = [ file(params.test_data['sarscov2']['genome']['test_bed'], checkIfExists: true) ] - ALLELECOUNTER ( input, positions ) + ALLELECOUNTER ( input, positions, [] ) +} + + +workflow test_allelecounter_cram { + input = [ [ id:'test', single_end:false ], // meta map + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram_crai'], checkIfExists: true) + ] + positions = [ file(params.test_data['homo_sapiens']['genome']['genome_bed'], checkIfExists: true) ] + fasta = [ file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) ] + + ALLELECOUNTER ( input, positions, fasta ) } diff --git a/tests/modules/allelecounter/test.yml b/tests/modules/allelecounter/test.yml index 7ed71559..bbef0ecc 100644 --- a/tests/modules/allelecounter/test.yml +++ b/tests/modules/allelecounter/test.yml @@ -1,7 +1,15 @@ -- name: allelecounter test_allelecounter - command: nextflow run tests/modules/allelecounter -entry test_allelecounter -c tests/config/nextflow.config +- name: allelecounter test_allelecounter_bam + command: nextflow run tests/modules/allelecounter -entry test_allelecounter_bam -c tests/config/nextflow.config tags: - allelecounter files: - path: output/allelecounter/test.alleleCount md5sum: 2bbe9d7331b78bdac30fe30dbc5fdaf3 + +- name: allelecounter test_allelecounter_cram + command: nextflow run tests/modules/allelecounter -entry test_allelecounter_cram -c tests/config/nextflow.config + tags: + - allelecounter + files: + - path: output/allelecounter/test.alleleCount + md5sum: 2f83352a185168c7c98e9e42550b2856 From 9767b081b9d808d7b6671d34d48c70afb436a5f1 Mon Sep 17 00:00:00 2001 From: "Maxime U. Garcia" Date: Wed, 3 Nov 2021 11:22:06 +0100 Subject: [PATCH 12/15] Merge freebayes subtools (#1015) * feat: merge freebayes subtools * fix: typo * assess comments from review * fix: path to module --- modules/freebayes/{germline => }/functions.nf | 0 modules/freebayes/{germline => }/main.nf | 7 +- modules/freebayes/{germline => }/meta.yml | 9 +- modules/freebayes/somatic/functions.nf | 78 --------------- modules/freebayes/somatic/main.nf | 74 --------------- modules/freebayes/somatic/meta.yml | 66 ------------- tests/config/pytest_modules.yml | 10 +- tests/modules/freebayes/germline/main.nf | 51 ---------- tests/modules/freebayes/germline/test.yml | 26 ----- tests/modules/freebayes/main.nf | 95 +++++++++++++++++++ tests/modules/freebayes/somatic/main.nf | 37 -------- tests/modules/freebayes/somatic/test.yml | 17 ---- tests/modules/freebayes/test.yml | 39 ++++++++ 13 files changed, 148 insertions(+), 361 deletions(-) rename modules/freebayes/{germline => }/functions.nf (100%) rename modules/freebayes/{germline => }/main.nf (91%) rename modules/freebayes/{germline => }/meta.yml (94%) delete mode 100644 modules/freebayes/somatic/functions.nf delete mode 100644 modules/freebayes/somatic/main.nf delete mode 100644 modules/freebayes/somatic/meta.yml delete mode 100644 tests/modules/freebayes/germline/main.nf delete mode 100644 tests/modules/freebayes/germline/test.yml create mode 100644 tests/modules/freebayes/main.nf delete mode 100644 tests/modules/freebayes/somatic/main.nf delete mode 100644 tests/modules/freebayes/somatic/test.yml create mode 100644 tests/modules/freebayes/test.yml diff --git a/modules/freebayes/germline/functions.nf b/modules/freebayes/functions.nf similarity index 100% rename from modules/freebayes/germline/functions.nf rename to modules/freebayes/functions.nf diff --git a/modules/freebayes/germline/main.nf b/modules/freebayes/main.nf similarity index 91% rename from modules/freebayes/germline/main.nf rename to modules/freebayes/main.nf index eae62036..0b23dc40 100644 --- a/modules/freebayes/germline/main.nf +++ b/modules/freebayes/main.nf @@ -4,7 +4,7 @@ include { initOptions; saveFiles; getProcessName; getSoftwareName } from './func params.options = [:] options = initOptions(params.options) -process FREEBAYES_GERMLINE { +process FREEBAYES { tag "$meta.id" label 'process_low' publishDir "${params.outdir}", @@ -19,7 +19,7 @@ process FREEBAYES_GERMLINE { } input: - tuple val(meta), path(input), path(input_index) + tuple val(meta), path(input_1), path(input_1_index), path(input_2), path(input_2_index) path fasta path fai path targets @@ -33,6 +33,7 @@ process FREEBAYES_GERMLINE { script: def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}" + def input = input_2 ? "${input_1} ${input_2}" : "${input_1}" def targets_file = targets ? "--target ${targets}" : "" def samples_file = samples ? "--samples ${samples}" : "" def populations_file = populations ? "--populations ${populations}" : "" @@ -48,7 +49,7 @@ process FREEBAYES_GERMLINE { $populations_file \\ $cnv_file \\ $options.args \\ - $input > ${prefix}.vcf + $input > ${prefix}.vcf gzip --no-name ${prefix}.vcf diff --git a/modules/freebayes/germline/meta.yml b/modules/freebayes/meta.yml similarity index 94% rename from modules/freebayes/germline/meta.yml rename to modules/freebayes/meta.yml index 86650715..75d44826 100644 --- a/modules/freebayes/germline/meta.yml +++ b/modules/freebayes/meta.yml @@ -1,11 +1,14 @@ -name: freebayes_germline +name: freebayes description: A haplotype-based variant detector keywords: - variant caller - SNP - genotyping - - variant calling + - somatic variant calling + - germline variant calling + - bacterial variant calling - bayesian + tools: - freebayes: description: Bayesian haplotype-based polymorphism discovery and genotyping @@ -73,6 +76,8 @@ output: type: file description: Compressed VCF file pattern: "*.vcf.gz" + authors: - "@maxibor" - "@FriederikeHanssen" + - "@maxulysse" diff --git a/modules/freebayes/somatic/functions.nf b/modules/freebayes/somatic/functions.nf deleted file mode 100644 index 85628ee0..00000000 --- a/modules/freebayes/somatic/functions.nf +++ /dev/null @@ -1,78 +0,0 @@ -// -// Utility functions used in nf-core DSL2 module files -// - -// -// Extract name of software tool from process name using $task.process -// -def getSoftwareName(task_process) { - return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase() -} - -// -// Extract name of module from process name using $task.process -// -def getProcessName(task_process) { - return task_process.tokenize(':')[-1] -} - -// -// Function to initialise default values and to generate a Groovy Map of available options for nf-core modules -// -def initOptions(Map args) { - def Map options = [:] - options.args = args.args ?: '' - options.args2 = args.args2 ?: '' - options.args3 = args.args3 ?: '' - options.publish_by_meta = args.publish_by_meta ?: [] - options.publish_dir = args.publish_dir ?: '' - options.publish_files = args.publish_files - options.suffix = args.suffix ?: '' - return options -} - -// -// Tidy up and join elements of a list to return a path string -// -def getPathFromList(path_list) { - def paths = path_list.findAll { item -> !item?.trim().isEmpty() } // Remove empty entries - paths = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes - return paths.join('/') -} - -// -// Function to save/publish module results -// -def saveFiles(Map args) { - def ioptions = initOptions(args.options) - def path_list = [ ioptions.publish_dir ?: args.publish_dir ] - - // Do not publish versions.yml unless running from pytest workflow - if (args.filename.equals('versions.yml') && !System.getenv("NF_CORE_MODULES_TEST")) { - return null - } - if (ioptions.publish_by_meta) { - def key_list = ioptions.publish_by_meta instanceof List ? ioptions.publish_by_meta : args.publish_by_meta - for (key in key_list) { - if (args.meta && key instanceof String) { - def path = key - if (args.meta.containsKey(key)) { - path = args.meta[key] instanceof Boolean ? "${key}_${args.meta[key]}".toString() : args.meta[key] - } - path = path instanceof String ? path : '' - path_list.add(path) - } - } - } - if (ioptions.publish_files instanceof Map) { - for (ext in ioptions.publish_files) { - if (args.filename.endsWith(ext.key)) { - def ext_list = path_list.collect() - ext_list.add(ext.value) - return "${getPathFromList(ext_list)}/$args.filename" - } - } - } else if (ioptions.publish_files == null) { - return "${getPathFromList(path_list)}/$args.filename" - } -} diff --git a/modules/freebayes/somatic/main.nf b/modules/freebayes/somatic/main.nf deleted file mode 100644 index c1579661..00000000 --- a/modules/freebayes/somatic/main.nf +++ /dev/null @@ -1,74 +0,0 @@ -// Import generic module functions -include { initOptions; saveFiles; getProcessName; getSoftwareName } from './functions' - -params.options = [:] -options = initOptions(params.options) - -process FREEBAYES_SOMATIC { - tag "$meta.id" - label 'process_low' - publishDir "${params.outdir}", - mode: params.publish_dir_mode, - saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:meta, publish_by_meta:['id']) } - - conda (params.enable_conda ? "bioconda::freebayes=1.3.5" : null) - if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { - container "https://depot.galaxyproject.org/singularity/freebayes:1.3.5--py38ha193a2f_3" - } else { - container "quay.io/biocontainers/freebayes:1.3.5--py38ha193a2f_3" - } - - input: - tuple val(meta), path(input_normal), path(input_index_normal), path(input_tumor), path(input_index_tumor) - path fasta - path fai - path targets - path samples - - output: - tuple val(meta), path("*.vcf.gz") , emit: vcf - path "versions.yml" , emit: versions - - script: - def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}" - def targets_file = targets ? "--target ${targets}" : "" - def samples_file = samples ? "--samples ${samples}" : "" - - if (task.cpus > 1) { - """ - freebayes-parallel \\ - <(fasta_generate_regions.py ${fasta}.fai 10000) ${task.cpus} \\ - -f $fasta \\ - $targets_file \\ - $samples_file \\ - $options.args \\ - $input_tumor \\ - $input_normal > ${prefix}.vcf - - gzip --no-name ${prefix}.vcf - - cat <<-END_VERSIONS > versions.yml - ${getProcessName(task.process)}: - ${getSoftwareName(task.process)}: \$(echo \$(freebayes --version 2>&1) | sed 's/version:\s*v//g' ) - END_VERSIONS - """ - - } else { - """ - freebayes \\ - -f $fasta \\ - $targets_file \\ - $samples_file \\ - $options.args \\ - $input_tumor \\ - $input_normal > ${prefix}.vcf - - gzip --no-name ${prefix}.vcf - - cat <<-END_VERSIONS > versions.yml - ${getProcessName(task.process)}: - ${getSoftwareName(task.process)}: \$(echo \$(freebayes --version 2>&1) | sed 's/version:\s*v//g' ) - END_VERSIONS - """ - } -} diff --git a/modules/freebayes/somatic/meta.yml b/modules/freebayes/somatic/meta.yml deleted file mode 100644 index 391e5007..00000000 --- a/modules/freebayes/somatic/meta.yml +++ /dev/null @@ -1,66 +0,0 @@ -name: freebayes_somatic -description: A haplotype-based variant detector -keywords: - - variant caller - - SNP - - genotyping - - somatic variant calling - - bayesian - -tools: - - freebayes: - description: Bayesian haplotype-based polymorphism discovery and genotyping - homepage: https://github.com/freebayes/freebayes - documentation: https://github.com/freebayes/freebayes - tool_dev_url: https://github.com/freebayes/freebayes - doi: "arXiv:1207.3907" - licence: ['MIT'] - -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - input: - type: file - description: BAM/CRAM/SAM file - pattern: "*.{bam,cram,sam}" - - input_index: - type: file - description: BAM/CRAM/SAM index file - pattern: "*.bam.bai" - - fasta: - type: file - description: reference fasta file - pattern: ".{fa,fa.gz,fasta,fasta.gz}" - - fai: - type: file - description: reference fasta file index - pattern: "*.fai" - - targets: - type: file - description: Optional - Limit analysis to targets listed in this BED-format FILE. - pattern: "*.bed" - - samples: - type: file - description: Optional - Limit analysis to samples listed (one per line) in the FILE. - pattern: "*.txt" - -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - version: - type: file - description: File containing software version - pattern: "*.{version.txt}" - - vcf: - type: file - description: Compressed VCF file - pattern: "*.vcf.gz" - -authors: - - "@FriederikeHanssen" diff --git a/tests/config/pytest_modules.yml b/tests/config/pytest_modules.yml index 9320245f..047f83ff 100644 --- a/tests/config/pytest_modules.yml +++ b/tests/config/pytest_modules.yml @@ -410,13 +410,9 @@ flash: - modules/flash/** - tests/modules/flash/** -freebayes/germline: - - modules/freebayes/germline/** - - tests/modules/freebayes/germline/** - -freebayes/somatic: - - modules/freebayes/somatic/** - - tests/modules/freebayes/somatic/** +freebayes: + - modules/freebayes/** + - tests/modules/freebayes/** gatk4/applybqsr: - modules/gatk4/applybqsr/** diff --git a/tests/modules/freebayes/germline/main.nf b/tests/modules/freebayes/germline/main.nf deleted file mode 100644 index 1b39eea0..00000000 --- a/tests/modules/freebayes/germline/main.nf +++ /dev/null @@ -1,51 +0,0 @@ -#!/usr/bin/env nextflow - -nextflow.enable.dsl = 2 - -include { FREEBAYES_GERMLINE } from '../../../../modules/freebayes/germline/main.nf' addParams( options: [:] ) - -workflow test_freebayes { - - input = [ [ id:'test', single_end:false ], // meta map - file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), - file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true)] - fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) - fai = file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true) - targets = [] - samples = [] - populations = [] - cnv = [] - - FREEBAYES_GERMLINE ( input, fasta, fai, targets, samples, populations, cnv) -} - -workflow test_freebayes_bed { - - input = [ [ id:'test', single_end:false ], // meta map - file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), - file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true)] - fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) - fai = file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true) - targets = file(params.test_data['sarscov2']['genome']['test_bed'], checkIfExists: true) - samples = [] - populations = [] - cnv = [] - - FREEBAYES_GERMLINE ( input, fasta, fai, targets, samples, populations, cnv) -} - -workflow test_freebayes_cram { - - input = [ [ id:'test', single_end:false ], // meta map - file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram'], checkIfExists: true), - file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram_crai'], checkIfExists: true) - ] - fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) - fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) - targets = [] - samples = [] - populations = [] - cnv = [] - - FREEBAYES_GERMLINE ( input, fasta, fai, targets, samples, populations, cnv) -} diff --git a/tests/modules/freebayes/germline/test.yml b/tests/modules/freebayes/germline/test.yml deleted file mode 100644 index 55925b92..00000000 --- a/tests/modules/freebayes/germline/test.yml +++ /dev/null @@ -1,26 +0,0 @@ -- name: freebayes germline test_freebayes - command: nextflow run tests/modules/freebayes/germline -entry test_freebayes -c tests/config/nextflow.config - tags: - - freebayes - - freebayes/germline - files: - - path: output/freebayes/test.vcf.gz - md5sum: 1ec210ad27514c7a4140c924dc66d979 - -- name: freebayes germline test_freebayes_bed - command: nextflow run tests/modules/freebayes/germline -entry test_freebayes_bed -c tests/config/nextflow.config - tags: - - freebayes - - freebayes/germline - files: - - path: output/freebayes/test.vcf.gz - md5sum: e8923cccd5dac196f72d3d3997a60706 - -- name: freebayes germline test_freebayes_cram - command: nextflow run tests/modules/freebayes/germline -entry test_freebayes_cram -c tests/config/nextflow.config - tags: - - freebayes - - freebayes/germline - files: - - path: output/freebayes/test.vcf.gz - md5sum: cb57a3ed154618e3aa4a5272fcfb7521 diff --git a/tests/modules/freebayes/main.nf b/tests/modules/freebayes/main.nf new file mode 100644 index 00000000..c6f5641f --- /dev/null +++ b/tests/modules/freebayes/main.nf @@ -0,0 +1,95 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { FREEBAYES } from '../../../modules/freebayes/main.nf' addParams( options: [:] ) + +workflow test_freebayes { + + input = [ [ id:'test', single_end:false ], // meta map + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true), + [], + [] + ] + fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + fai = file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true) + targets = [] + samples = [] + populations = [] + cnv = [] + + FREEBAYES (input, fasta, fai, targets, samples, populations, cnv) +} + +workflow test_freebayes_bed { + + input = [ [ id:'test', single_end:false ], // meta map + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true), + [], + [] + ] + fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + fai = file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true) + targets = file(params.test_data['sarscov2']['genome']['test_bed'], checkIfExists: true) + samples = [] + populations = [] + cnv = [] + + FREEBAYES (input, fasta, fai, targets, samples, populations, cnv) +} + +workflow test_freebayes_cram { + + input = [ [ id:'test', single_end:false ], // meta map + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram_crai'], checkIfExists: true), + [], + [] + ] + fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) + targets = [] + samples = [] + populations = [] + cnv = [] + + FREEBAYES (input, fasta, fai, targets, samples, populations, cnv) +} + +workflow test_freebayes_somatic { + + input = [ [ id:'test', single_end:false ], // meta map + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_sorted_bam'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_sorted_bam_bai'], checkIfExists: true) + ] + fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) + targets = [] + samples = [] + populations = [] + cnv = [] + + FREEBAYES (input, fasta, fai, targets, samples, populations, cnv) +} + +workflow test_freebayes_somatic_cram_intervals { + + input = [ [ id:'test', single_end:false ], // meta map + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram_crai'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_sorted_cram'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_sorted_cram_crai'], checkIfExists: true) + ] + fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) + targets = file(params.test_data['homo_sapiens']['genome']['genome_bed'], checkIfExists: true) + samples = [] + populations = [] + cnv = [] + + FREEBAYES (input, fasta, fai, targets, samples, populations, cnv) +} diff --git a/tests/modules/freebayes/somatic/main.nf b/tests/modules/freebayes/somatic/main.nf deleted file mode 100644 index d26caf34..00000000 --- a/tests/modules/freebayes/somatic/main.nf +++ /dev/null @@ -1,37 +0,0 @@ -#!/usr/bin/env nextflow - -nextflow.enable.dsl = 2 - -include { FREEBAYES_SOMATIC } from '../../../../modules/freebayes/somatic/main.nf' addParams( options: [:] ) - -workflow test_freebayes { - - input = [ [ id:'test', single_end:false ], // meta map - file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), - file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true), - file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_sorted_bam'], checkIfExists: true), - file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_sorted_bam_bai'], checkIfExists: true) - ] - fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) - fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) - targets = [] - samples = [] - - FREEBAYES_SOMATIC ( input, fasta, fai, targets, samples) -} - -workflow test_freebayes_intervals { - - input = [ [ id:'test', single_end:false ], // meta map - file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram'], checkIfExists: true), - file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram_crai'], checkIfExists: true), - file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_sorted_cram'], checkIfExists: true), - file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_sorted_cram_crai'], checkIfExists: true) - ] - fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) - fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) - targets = file(params.test_data['homo_sapiens']['genome']['genome_bed'], checkIfExists: true) - samples = [] - - FREEBAYES_SOMATIC ( input, fasta, fai, targets, samples) -} diff --git a/tests/modules/freebayes/somatic/test.yml b/tests/modules/freebayes/somatic/test.yml deleted file mode 100644 index 93113e60..00000000 --- a/tests/modules/freebayes/somatic/test.yml +++ /dev/null @@ -1,17 +0,0 @@ -- name: freebayes somatic test_freebayes - command: nextflow run tests/modules/freebayes/somatic -entry test_freebayes -c tests/config/nextflow.config - tags: - - freebayes/somatic - - freebayes - files: - - path: output/freebayes/test.vcf.gz - md5sum: 1c47d02f27ec5918558c8688ce6e7780 - -- name: freebayes somatic test_freebayes_intervals - command: nextflow run tests/modules/freebayes/somatic -entry test_freebayes_intervals -c tests/config/nextflow.config - tags: - - freebayes/somatic - - freebayes - files: - - path: output/freebayes/test.vcf.gz - md5sum: 5b8a12666bde63746dcec7afcd3ef789 diff --git a/tests/modules/freebayes/test.yml b/tests/modules/freebayes/test.yml new file mode 100644 index 00000000..a5840609 --- /dev/null +++ b/tests/modules/freebayes/test.yml @@ -0,0 +1,39 @@ +- name: freebayes test_freebayes + command: nextflow run tests/modules/freebayes -entry test_freebayes -c tests/config/nextflow.config + tags: + - freebayes + files: + - path: output/freebayes/test.vcf.gz + md5sum: 04d60a7135768777e0c764daec6519db + +- name: freebayes test_freebayes_bed + command: nextflow run tests/modules/freebayes -entry test_freebayes_bed -c tests/config/nextflow.config + tags: + - freebayes + files: + - path: output/freebayes/test.vcf.gz + md5sum: fc6e228c8ac5508bd83da45eafc2e7b2 + +- name: freebayes test_freebayes_cram + command: nextflow run tests/modules/freebayes -entry test_freebayes_cram -c tests/config/nextflow.config + tags: + - freebayes + files: + - path: output/freebayes/test.vcf.gz + md5sum: 91b8caaa0e396e1ba4f264a83bb67254 + +- name: freebayes test_freebayes_somatic + command: nextflow run tests/modules/freebayes -entry test_freebayes_somatic -c tests/config/nextflow.config + tags: + - freebayes + files: + - path: output/freebayes/test.vcf.gz + md5sum: 40da977199f16d9888e0e0e07e8bebee + +- name: freebayes test_freebayes_somatic_cram_intervals + command: nextflow run tests/modules/freebayes -entry test_freebayes_somatic_cram_intervals -c tests/config/nextflow.config + tags: + - freebayes + files: + - path: output/freebayes/test.vcf.gz + md5sum: dd976880365287d9ad31a606eb4d091f From 11226d9d98a59af9519a871f28df183391c0d300 Mon Sep 17 00:00:00 2001 From: "Maxime U. Garcia" Date: Wed, 3 Nov 2021 11:41:12 +0100 Subject: [PATCH 13/15] fix remove md5sum check (#1017) --- tests/modules/freebayes/test.yml | 5 ----- 1 file changed, 5 deletions(-) diff --git a/tests/modules/freebayes/test.yml b/tests/modules/freebayes/test.yml index a5840609..22fd0e88 100644 --- a/tests/modules/freebayes/test.yml +++ b/tests/modules/freebayes/test.yml @@ -4,7 +4,6 @@ - freebayes files: - path: output/freebayes/test.vcf.gz - md5sum: 04d60a7135768777e0c764daec6519db - name: freebayes test_freebayes_bed command: nextflow run tests/modules/freebayes -entry test_freebayes_bed -c tests/config/nextflow.config @@ -12,7 +11,6 @@ - freebayes files: - path: output/freebayes/test.vcf.gz - md5sum: fc6e228c8ac5508bd83da45eafc2e7b2 - name: freebayes test_freebayes_cram command: nextflow run tests/modules/freebayes -entry test_freebayes_cram -c tests/config/nextflow.config @@ -20,7 +18,6 @@ - freebayes files: - path: output/freebayes/test.vcf.gz - md5sum: 91b8caaa0e396e1ba4f264a83bb67254 - name: freebayes test_freebayes_somatic command: nextflow run tests/modules/freebayes -entry test_freebayes_somatic -c tests/config/nextflow.config @@ -28,7 +25,6 @@ - freebayes files: - path: output/freebayes/test.vcf.gz - md5sum: 40da977199f16d9888e0e0e07e8bebee - name: freebayes test_freebayes_somatic_cram_intervals command: nextflow run tests/modules/freebayes -entry test_freebayes_somatic_cram_intervals -c tests/config/nextflow.config @@ -36,4 +32,3 @@ - freebayes files: - path: output/freebayes/test.vcf.gz - md5sum: dd976880365287d9ad31a606eb4d091f From 08b71fa85f69147f7c56552bfca08045ac43a137 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Wed, 3 Nov 2021 17:01:23 +0100 Subject: [PATCH 14/15] New module: `gunc run` (+ `gunc downloaddb`) (#880) * Specify more guidelines on input channels * Linting * Updates based on code review * Update README.md * Fix broken sentence * feat: add megahit module, currently decompressed output * Update main.nf * Update tests/modules/megahit/test.yml Co-authored-by: Maxime Borry * Apply suggestions from code review Co-authored-by: Harshil Patel * feat: compress all outputs, remove md5sums due to gz stochasicity * fix: wrong conda channel for pigz * fix: broken singleend tests and update meta.yml * Missed one * Apply suggestions from code review Co-authored-by: Harshil Patel * fix: pigz formatting * Apply suggestions from code review Co-authored-by: Harshil Patel * Apply suggestions from code review * Add GUNC download_db and run commands * Bump with version without zgrep * Apply suggestions from code review Co-authored-by: Robert A. Petit III * Harshil formatting * Apply suggestions from code review Co-authored-by: Robert A. Petit III Co-authored-by: Harshil Patel Co-authored-by: Maxime Borry Co-authored-by: Robert A. Petit III --- modules/gunc/downloaddb/functions.nf | 78 ++++++++++++++++++++++++++ modules/gunc/downloaddb/main.nf | 37 ++++++++++++ modules/gunc/downloaddb/meta.yml | 36 ++++++++++++ modules/gunc/run/functions.nf | 78 ++++++++++++++++++++++++++ modules/gunc/run/main.nf | 45 +++++++++++++++ modules/gunc/run/meta.yml | 53 +++++++++++++++++ tests/config/pytest_modules.yml | 8 +++ tests/modules/gunc/downloaddb/main.nf | 12 ++++ tests/modules/gunc/downloaddb/test.yml | 8 +++ tests/modules/gunc/run/main.nf | 17 ++++++ tests/modules/gunc/run/test.yml | 8 +++ 11 files changed, 380 insertions(+) create mode 100644 modules/gunc/downloaddb/functions.nf create mode 100644 modules/gunc/downloaddb/main.nf create mode 100644 modules/gunc/downloaddb/meta.yml create mode 100644 modules/gunc/run/functions.nf create mode 100644 modules/gunc/run/main.nf create mode 100644 modules/gunc/run/meta.yml create mode 100644 tests/modules/gunc/downloaddb/main.nf create mode 100644 tests/modules/gunc/downloaddb/test.yml create mode 100644 tests/modules/gunc/run/main.nf create mode 100644 tests/modules/gunc/run/test.yml diff --git a/modules/gunc/downloaddb/functions.nf b/modules/gunc/downloaddb/functions.nf new file mode 100644 index 00000000..85628ee0 --- /dev/null +++ b/modules/gunc/downloaddb/functions.nf @@ -0,0 +1,78 @@ +// +// Utility functions used in nf-core DSL2 module files +// + +// +// Extract name of software tool from process name using $task.process +// +def getSoftwareName(task_process) { + return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase() +} + +// +// Extract name of module from process name using $task.process +// +def getProcessName(task_process) { + return task_process.tokenize(':')[-1] +} + +// +// Function to initialise default values and to generate a Groovy Map of available options for nf-core modules +// +def initOptions(Map args) { + def Map options = [:] + options.args = args.args ?: '' + options.args2 = args.args2 ?: '' + options.args3 = args.args3 ?: '' + options.publish_by_meta = args.publish_by_meta ?: [] + options.publish_dir = args.publish_dir ?: '' + options.publish_files = args.publish_files + options.suffix = args.suffix ?: '' + return options +} + +// +// Tidy up and join elements of a list to return a path string +// +def getPathFromList(path_list) { + def paths = path_list.findAll { item -> !item?.trim().isEmpty() } // Remove empty entries + paths = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes + return paths.join('/') +} + +// +// Function to save/publish module results +// +def saveFiles(Map args) { + def ioptions = initOptions(args.options) + def path_list = [ ioptions.publish_dir ?: args.publish_dir ] + + // Do not publish versions.yml unless running from pytest workflow + if (args.filename.equals('versions.yml') && !System.getenv("NF_CORE_MODULES_TEST")) { + return null + } + if (ioptions.publish_by_meta) { + def key_list = ioptions.publish_by_meta instanceof List ? ioptions.publish_by_meta : args.publish_by_meta + for (key in key_list) { + if (args.meta && key instanceof String) { + def path = key + if (args.meta.containsKey(key)) { + path = args.meta[key] instanceof Boolean ? "${key}_${args.meta[key]}".toString() : args.meta[key] + } + path = path instanceof String ? path : '' + path_list.add(path) + } + } + } + if (ioptions.publish_files instanceof Map) { + for (ext in ioptions.publish_files) { + if (args.filename.endsWith(ext.key)) { + def ext_list = path_list.collect() + ext_list.add(ext.value) + return "${getPathFromList(ext_list)}/$args.filename" + } + } + } else if (ioptions.publish_files == null) { + return "${getPathFromList(path_list)}/$args.filename" + } +} diff --git a/modules/gunc/downloaddb/main.nf b/modules/gunc/downloaddb/main.nf new file mode 100644 index 00000000..af421608 --- /dev/null +++ b/modules/gunc/downloaddb/main.nf @@ -0,0 +1,37 @@ +// Import generic module functions +include { initOptions; saveFiles; getSoftwareName; getProcessName } from './functions' + +params.options = [:] +options = initOptions(params.options) + +process GUNC_DOWNLOADDB { + tag '$db_name' + label 'process_low' + publishDir "${params.outdir}", + mode: params.publish_dir_mode, + saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:[:], publish_by_meta:[]) } + + conda (params.enable_conda ? "bioconda::gunc=1.0.5" : null) + if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { + container "https://depot.galaxyproject.org/singularity/gunc:1.0.5--pyhdfd78af_0" + } else { + container "quay.io/biocontainers/gunc:1.0.5--pyhdfd78af_0" + } + + input: + val db_name + + output: + path "*.dmnd" , emit: db + path "versions.yml" , emit: versions + + script: + """ + gunc download_db . -db $db_name $options.args + + cat <<-END_VERSIONS > versions.yml + ${getProcessName(task.process)}: + ${getSoftwareName(task.process)}: \$( gunc --version ) + END_VERSIONS + """ +} diff --git a/modules/gunc/downloaddb/meta.yml b/modules/gunc/downloaddb/meta.yml new file mode 100644 index 00000000..cb486da0 --- /dev/null +++ b/modules/gunc/downloaddb/meta.yml @@ -0,0 +1,36 @@ +name: gunc_downloaddb +description: Download database for GUNC detection of Chimerism and Contamination in Prokaryotic Genomes +keywords: + - download + - prokaryote + - assembly + - genome + - quality control + - chimeras +tools: + - gunc: + description: Python package for detection of chimerism and contamination in prokaryotic genomes. + homepage: https://grp-bork.embl-community.io/gunc/ + documentation: https://grp-bork.embl-community.io/gunc/ + tool_dev_url: https://github.com/grp-bork/gunc + doi: "10.1186/s13059-021-02393-0" + licence: ['GNU General Public v3 or later (GPL v3+)'] + +input: + - db_name: + type: string + description: "Which database to download. Options: progenomes or gtdb" + pattern: "progenomes|gtdb" + +output: + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - db: + type: file + description: GUNC database file + pattern: "*.dmnd" + +authors: + - "@jfy133" diff --git a/modules/gunc/run/functions.nf b/modules/gunc/run/functions.nf new file mode 100644 index 00000000..85628ee0 --- /dev/null +++ b/modules/gunc/run/functions.nf @@ -0,0 +1,78 @@ +// +// Utility functions used in nf-core DSL2 module files +// + +// +// Extract name of software tool from process name using $task.process +// +def getSoftwareName(task_process) { + return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase() +} + +// +// Extract name of module from process name using $task.process +// +def getProcessName(task_process) { + return task_process.tokenize(':')[-1] +} + +// +// Function to initialise default values and to generate a Groovy Map of available options for nf-core modules +// +def initOptions(Map args) { + def Map options = [:] + options.args = args.args ?: '' + options.args2 = args.args2 ?: '' + options.args3 = args.args3 ?: '' + options.publish_by_meta = args.publish_by_meta ?: [] + options.publish_dir = args.publish_dir ?: '' + options.publish_files = args.publish_files + options.suffix = args.suffix ?: '' + return options +} + +// +// Tidy up and join elements of a list to return a path string +// +def getPathFromList(path_list) { + def paths = path_list.findAll { item -> !item?.trim().isEmpty() } // Remove empty entries + paths = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes + return paths.join('/') +} + +// +// Function to save/publish module results +// +def saveFiles(Map args) { + def ioptions = initOptions(args.options) + def path_list = [ ioptions.publish_dir ?: args.publish_dir ] + + // Do not publish versions.yml unless running from pytest workflow + if (args.filename.equals('versions.yml') && !System.getenv("NF_CORE_MODULES_TEST")) { + return null + } + if (ioptions.publish_by_meta) { + def key_list = ioptions.publish_by_meta instanceof List ? ioptions.publish_by_meta : args.publish_by_meta + for (key in key_list) { + if (args.meta && key instanceof String) { + def path = key + if (args.meta.containsKey(key)) { + path = args.meta[key] instanceof Boolean ? "${key}_${args.meta[key]}".toString() : args.meta[key] + } + path = path instanceof String ? path : '' + path_list.add(path) + } + } + } + if (ioptions.publish_files instanceof Map) { + for (ext in ioptions.publish_files) { + if (args.filename.endsWith(ext.key)) { + def ext_list = path_list.collect() + ext_list.add(ext.value) + return "${getPathFromList(ext_list)}/$args.filename" + } + } + } else if (ioptions.publish_files == null) { + return "${getPathFromList(path_list)}/$args.filename" + } +} diff --git a/modules/gunc/run/main.nf b/modules/gunc/run/main.nf new file mode 100644 index 00000000..f873a7df --- /dev/null +++ b/modules/gunc/run/main.nf @@ -0,0 +1,45 @@ +// Import generic module functions +include { initOptions; saveFiles; getSoftwareName; getProcessName } from './functions' + +params.options = [:] +options = initOptions(params.options) + +process GUNC_RUN { + tag "$meta.id" + label 'process_medium' + publishDir "${params.outdir}", + mode: params.publish_dir_mode, + saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:meta, publish_by_meta:['id']) } + + conda (params.enable_conda ? "bioconda::gunc=1.0.5" : null) + if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { + container "https://depot.galaxyproject.org/singularity/gunc:1.0.5--pyhdfd78af_0" + } else { + container "quay.io/biocontainers/gunc:1.0.5--pyhdfd78af_0" + } + + input: + tuple val(meta), path(fasta) + path(db) + + output: + tuple val(meta), path("*maxCSS_level.tsv") , emit: maxcss_level_tsv + tuple val(meta), path("*all_levels.tsv") , optional: true, emit: all_levels_tsv + path "versions.yml" , emit: versions + + script: + def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}" + """ + gunc \\ + run \\ + --input_fasta $fasta \\ + --db_file $db \\ + --threads $task.cpus \\ + $options.args + + cat <<-END_VERSIONS > versions.yml + ${getProcessName(task.process)}: + ${getSoftwareName(task.process)}: \$( gunc --version ) + END_VERSIONS + """ +} diff --git a/modules/gunc/run/meta.yml b/modules/gunc/run/meta.yml new file mode 100644 index 00000000..1dd4a8ae --- /dev/null +++ b/modules/gunc/run/meta.yml @@ -0,0 +1,53 @@ +name: gunc_run +description: Detection of Chimerism and Contamination in Prokaryotic Genomes +keywords: + - prokaryote + - assembly + - genome + - quality control + - chimeras +tools: + - gunc: + description: Python package for detection of chimerism and contamination in prokaryotic genomes. + homepage: https://grp-bork.embl-community.io/gunc/ + documentation: https://grp-bork.embl-community.io/gunc/ + tool_dev_url: https://github.com/grp-bork/gunc + doi: "10.1186/s13059-021-02393-0" + licence: ['GNU General Public v3 or later (GPL v3+)'] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fasta: + type: file + description: FASTA file containing contig (bins) + pattern: "*.fa" + - db: + type: file + description: GUNC database file + pattern: "*.dmnd" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - maxcss_levels_tsv: + type: file + description: Output file with scores for a taxonomic level with the highest CSS score + pattern: "*.tsv" + - all_levels_tsv: + type: file + description: Optional output file with results for each taxonomic level + pattern: "*.tsv" + +authors: + - "@jfy133" diff --git a/tests/config/pytest_modules.yml b/tests/config/pytest_modules.yml index 047f83ff..8d8f32f3 100644 --- a/tests/config/pytest_modules.yml +++ b/tests/config/pytest_modules.yml @@ -546,6 +546,14 @@ gubbins: - modules/gubbins/** - tests/modules/gubbins/** +gunc/downloaddb: + - modules/gunc/downloaddb/** + - tests/modules/gunc/downloaddb/** + +gunc/run: + - modules/gunc/run/** + - tests/modules/gunc/run/** + gunzip: - modules/gunzip/** - tests/modules/gunzip/** diff --git a/tests/modules/gunc/downloaddb/main.nf b/tests/modules/gunc/downloaddb/main.nf new file mode 100644 index 00000000..c0321279 --- /dev/null +++ b/tests/modules/gunc/downloaddb/main.nf @@ -0,0 +1,12 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { GUNC_DOWNLOADDB } from '../../../../modules/gunc/downloaddb/main.nf' addParams( options: [:] ) + +workflow test_gunc_downloaddb { + + input = 'progenomes' + + GUNC_DOWNLOADDB ( input ) +} diff --git a/tests/modules/gunc/downloaddb/test.yml b/tests/modules/gunc/downloaddb/test.yml new file mode 100644 index 00000000..d1aafae7 --- /dev/null +++ b/tests/modules/gunc/downloaddb/test.yml @@ -0,0 +1,8 @@ +- name: gunc downloaddb + command: nextflow run ./tests/modules/gunc/downloaddb -entry test_gunc_downloaddb -c tests/config/nextflow.config + tags: + - gunc + - gunc/downloaddb + files: + - path: output/gunc/gunc_db_progenomes2.1.dmnd + md5sum: 447c9330056b02f29f30fe81fe4af4eb diff --git a/tests/modules/gunc/run/main.nf b/tests/modules/gunc/run/main.nf new file mode 100644 index 00000000..a1a191dc --- /dev/null +++ b/tests/modules/gunc/run/main.nf @@ -0,0 +1,17 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { GUNC_RUN } from '../../../../modules/gunc/run/main.nf' addParams( options: [:] ) +include { GUNC_DOWNLOADDB } from '../../../../modules/gunc/downloaddb/main.nf' addParams( options: [:] ) + + +workflow test_gunc_run { + + input = [ [ id:'test', single_end:false ], // meta map + file(params.test_data['sarscov2']['illumina']['contigs_fasta'], checkIfExists: true) ] + + GUNC_DOWNLOADDB('progenomes') + + GUNC_RUN ( input, GUNC_DOWNLOADDB.out.db ) +} diff --git a/tests/modules/gunc/run/test.yml b/tests/modules/gunc/run/test.yml new file mode 100644 index 00000000..d527f37e --- /dev/null +++ b/tests/modules/gunc/run/test.yml @@ -0,0 +1,8 @@ +- name: gunc run + command: nextflow run ./tests/modules/gunc/run -entry test_gunc_run -c tests/config/nextflow.config + tags: + - gunc + - gunc/run + files: + - path: output/gunc/GUNC.progenomes_2.1.maxCSS_level.tsv + md5sum: 0420c1a9f2c50fefaee9fab5d80a551a From fc4f3e8822865d85904c1e96e93868dae7247a81 Mon Sep 17 00:00:00 2001 From: "Robert A. Petit III" Date: Thu, 4 Nov 2021 01:49:30 -0600 Subject: [PATCH 15/15] add seqsero2 module (#1016) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * add seqsero2 module * correct lint errors * Update modules/seqsero2/main.nf Co-authored-by: Sébastien Guizard * set output directory Co-authored-by: Sébastien Guizard --- modules/seqsero2/functions.nf | 78 +++++++++++++++++++++++++++++++++ modules/seqsero2/main.nf | 45 +++++++++++++++++++ modules/seqsero2/meta.yml | 52 ++++++++++++++++++++++ tests/config/pytest_modules.yml | 12 +++-- tests/modules/seqsero2/main.nf | 13 ++++++ tests/modules/seqsero2/test.yml | 11 +++++ 6 files changed, 207 insertions(+), 4 deletions(-) create mode 100644 modules/seqsero2/functions.nf create mode 100644 modules/seqsero2/main.nf create mode 100644 modules/seqsero2/meta.yml create mode 100644 tests/modules/seqsero2/main.nf create mode 100644 tests/modules/seqsero2/test.yml diff --git a/modules/seqsero2/functions.nf b/modules/seqsero2/functions.nf new file mode 100644 index 00000000..85628ee0 --- /dev/null +++ b/modules/seqsero2/functions.nf @@ -0,0 +1,78 @@ +// +// Utility functions used in nf-core DSL2 module files +// + +// +// Extract name of software tool from process name using $task.process +// +def getSoftwareName(task_process) { + return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase() +} + +// +// Extract name of module from process name using $task.process +// +def getProcessName(task_process) { + return task_process.tokenize(':')[-1] +} + +// +// Function to initialise default values and to generate a Groovy Map of available options for nf-core modules +// +def initOptions(Map args) { + def Map options = [:] + options.args = args.args ?: '' + options.args2 = args.args2 ?: '' + options.args3 = args.args3 ?: '' + options.publish_by_meta = args.publish_by_meta ?: [] + options.publish_dir = args.publish_dir ?: '' + options.publish_files = args.publish_files + options.suffix = args.suffix ?: '' + return options +} + +// +// Tidy up and join elements of a list to return a path string +// +def getPathFromList(path_list) { + def paths = path_list.findAll { item -> !item?.trim().isEmpty() } // Remove empty entries + paths = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes + return paths.join('/') +} + +// +// Function to save/publish module results +// +def saveFiles(Map args) { + def ioptions = initOptions(args.options) + def path_list = [ ioptions.publish_dir ?: args.publish_dir ] + + // Do not publish versions.yml unless running from pytest workflow + if (args.filename.equals('versions.yml') && !System.getenv("NF_CORE_MODULES_TEST")) { + return null + } + if (ioptions.publish_by_meta) { + def key_list = ioptions.publish_by_meta instanceof List ? ioptions.publish_by_meta : args.publish_by_meta + for (key in key_list) { + if (args.meta && key instanceof String) { + def path = key + if (args.meta.containsKey(key)) { + path = args.meta[key] instanceof Boolean ? "${key}_${args.meta[key]}".toString() : args.meta[key] + } + path = path instanceof String ? path : '' + path_list.add(path) + } + } + } + if (ioptions.publish_files instanceof Map) { + for (ext in ioptions.publish_files) { + if (args.filename.endsWith(ext.key)) { + def ext_list = path_list.collect() + ext_list.add(ext.value) + return "${getPathFromList(ext_list)}/$args.filename" + } + } + } else if (ioptions.publish_files == null) { + return "${getPathFromList(path_list)}/$args.filename" + } +} diff --git a/modules/seqsero2/main.nf b/modules/seqsero2/main.nf new file mode 100644 index 00000000..3748a6e4 --- /dev/null +++ b/modules/seqsero2/main.nf @@ -0,0 +1,45 @@ +// Import generic module functions +include { initOptions; saveFiles; getSoftwareName; getProcessName } from './functions' + +params.options = [:] +options = initOptions(params.options) + +process SEQSERO2 { + tag "$meta.id" + label 'process_low' + publishDir "${params.outdir}", + mode: params.publish_dir_mode, + saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:meta, publish_by_meta:['id']) } + + conda (params.enable_conda ? "bioconda::seqsero2=1.2.1" : null) + if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { + container "https://depot.galaxyproject.org/singularity/seqsero2:1.2.1--py_0" + } else { + container "quay.io/biocontainers/seqsero2:1.2.1--py_0" + } + + input: + tuple val(meta), path(seqs) + + output: + tuple val(meta), path("results/*_log.txt") , emit: log + tuple val(meta), path("results/*_result.tsv"), emit: tsv + tuple val(meta), path("results/*_result.txt"), emit: txt + path "versions.yml" , emit: versions + + script: + def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}" + """ + SeqSero2_package.py \\ + $options.args \\ + -d results/ \\ + -n $prefix \\ + -p $task.cpus \\ + -i $seqs + + cat <<-END_VERSIONS > versions.yml + ${getProcessName(task.process)}: + ${getSoftwareName(task.process)}: \$( echo \$( SeqSero2_package.py --version 2>&1) | sed 's/^.*SeqSero2_package.py //' ) + END_VERSIONS + """ +} diff --git a/modules/seqsero2/meta.yml b/modules/seqsero2/meta.yml new file mode 100644 index 00000000..ceea80e3 --- /dev/null +++ b/modules/seqsero2/meta.yml @@ -0,0 +1,52 @@ +name: seqsero2 +description: Salmonella serotype prediction from reads and assemblies +keywords: + - fasta + - fastq + - salmonella + - sertotype +tools: + - seqsero2: + description: Salmonella serotype prediction from genome sequencing data + homepage: https://github.com/denglab/SeqSero2 + documentation: https://github.com/denglab/SeqSero2 + tool_dev_url: https://github.com/denglab/SeqSero2 + doi: "10.1128/AEM.01746-19" + licence: ['GPL v2'] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - seqs: + type: file + description: FASTQ or FASTA formated sequences + pattern: "*.{fq.gz,fastq.gz,fna.gz,fna,fasta.gz,fasta,fa.gz,fa}" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - log: + type: file + description: A log of serotype antigen results + pattern: "*_log.txt" + - tsv: + type: file + description: Tab-delimited summary of the SeqSero2 results + pattern: "*_result.tsv" + - txt: + type: file + description: Detailed summary of the SeqSero2 results + pattern: "*_result.txt" + +authors: + - "@rpetit3" diff --git a/tests/config/pytest_modules.yml b/tests/config/pytest_modules.yml index 8d8f32f3..008c98dc 100644 --- a/tests/config/pytest_modules.yml +++ b/tests/config/pytest_modules.yml @@ -446,14 +446,14 @@ gatk4/fastqtosam: - modules/gatk4/fastqtosam/** - tests/modules/gatk4/fastqtosam/** -gatk4/genomicsdbimport: - - modules/gatk4/genomicsdbimport/** - - tests/modules/gatk4/genomicsdbimport/** - gatk4/filtermutectcalls: - modules/gatk4/filtermutectcalls/** - tests/modules/gatk4/filtermutectcalls/** +gatk4/genomicsdbimport: + - modules/gatk4/genomicsdbimport/** + - tests/modules/gatk4/genomicsdbimport/** + gatk4/getpileupsummaries: - modules/gatk4/getpileupsummaries/** - tests/modules/gatk4/getpileupsummaries/** @@ -1083,6 +1083,10 @@ seqkit/split2: - modules/seqkit/split2/** - tests/modules/seqkit/split2/** +seqsero2: + - modules/seqsero2/** + - tests/modules/seqsero2/** + seqtk/mergepe: - modules/seqtk/mergepe/** - tests/modules/seqtk/mergepe/** diff --git a/tests/modules/seqsero2/main.nf b/tests/modules/seqsero2/main.nf new file mode 100644 index 00000000..04ee8e27 --- /dev/null +++ b/tests/modules/seqsero2/main.nf @@ -0,0 +1,13 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { SEQSERO2 } from '../../../modules/seqsero2/main.nf' addParams( options: [args: '-m k -t 4'] ) + +workflow test_seqsero2 { + + input = [ [ id:'test', single_end:false ], // meta map + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) ] + + SEQSERO2 ( input ) +} diff --git a/tests/modules/seqsero2/test.yml b/tests/modules/seqsero2/test.yml new file mode 100644 index 00000000..2aa49686 --- /dev/null +++ b/tests/modules/seqsero2/test.yml @@ -0,0 +1,11 @@ +- name: seqsero2 test_seqsero2 + command: nextflow run tests/modules/seqsero2 -entry test_seqsero2 -c tests/config/nextflow.config + tags: + - seqsero2 + files: + - path: output/seqsero2/results/SeqSero_log.txt + md5sum: d00242dfa734b5abb3622a6048f0b4fb + - path: output/seqsero2/results/SeqSero_result.tsv + contains: ['Sample', 'Predicted', 'Note'] + - path: output/seqsero2/results/SeqSero_result.txt + contains: ['Sample', 'Predicted', 'Note']