From 67481c6d543c8d84940376cb3b258354bb66f988 Mon Sep 17 00:00:00 2001 From: Mahesh Binzer-Panchal Date: Fri, 6 May 2022 15:04:04 +0000 Subject: [PATCH 1/5] Start Meryl count --- modules/meryl/count/main.nf | 45 ++++++++++++++++++++ modules/meryl/count/meta.yml | 51 +++++++++++++++++++++++ tests/config/pytest_modules.yml | 4 ++ tests/modules/meryl/count/main.nf | 15 +++++++ tests/modules/meryl/count/nextflow.config | 7 ++++ tests/modules/meryl/count/test.yml | 8 ++++ 6 files changed, 130 insertions(+) create mode 100644 modules/meryl/count/main.nf create mode 100644 modules/meryl/count/meta.yml create mode 100644 tests/modules/meryl/count/main.nf create mode 100644 tests/modules/meryl/count/nextflow.config create mode 100644 tests/modules/meryl/count/test.yml diff --git a/modules/meryl/count/main.nf b/modules/meryl/count/main.nf new file mode 100644 index 00000000..80491242 --- /dev/null +++ b/modules/meryl/count/main.nf @@ -0,0 +1,45 @@ +process MERYL_COUNT { + tag "$meta.id" + label 'process_medium' + + conda (params.enable_conda ? "bioconda::meryl=1.3" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/meryl:1.3--h87f3376_1': + 'quay.io/biocontainers/meryl:1.3--h87f3376_1' }" + + input: + tuple val(meta), path(reads) + + output: + tuple val(meta), path("*.meryl"), emit: meryl + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + for READ in $reads; do + meryl count \\ + threads=$task.cpus \\ + $args \\ + $reads \\ + output read.\${READ%.f*}.meryl + done + meryl union-sum \\ + threads=$task.cpus \\ + $args2 \\ + output ${prefix}.meryl + + # clean up + rm -rf read.*.meryl + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + meryl: \$( meryl --version |& sed 's/meryl //' ) + END_VERSIONS + """ +} diff --git a/modules/meryl/count/meta.yml b/modules/meryl/count/meta.yml new file mode 100644 index 00000000..885267ec --- /dev/null +++ b/modules/meryl/count/meta.yml @@ -0,0 +1,51 @@ +name: "meryl_count" +## TODO nf-core: Add a description of the module and list keywords +description: write your description here +keywords: + - sort +tools: + - "meryl": + ## TODO nf-core: Add a description and other details for the software below + description: "" + homepage: "{}" + documentation: "{}" + tool_dev_url: "{}" + doi: "" + licence: "['GPL']" + +## TODO nf-core: Add a description of all of the variables used as input +input: + # Only when we have meta + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + # + ## TODO nf-core: Delete / customise this example input + - bam: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + +## TODO nf-core: Add a description of all of the variables used as output +output: + #Only when we have meta + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + # + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + ## TODO nf-core: Delete / customise this example output + - bam: + type: file + description: Sorted BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + +authors: + - "@mahesh-panchal" diff --git a/tests/config/pytest_modules.yml b/tests/config/pytest_modules.yml index 62816c50..82c2de61 100644 --- a/tests/config/pytest_modules.yml +++ b/tests/config/pytest_modules.yml @@ -1214,6 +1214,10 @@ meningotype: - modules/meningotype/** - tests/modules/meningotype/** +meryl/count: + - modules/meryl/count/** + - tests/modules/meryl/count/** + metabat2/jgisummarizebamcontigdepths: - modules/metabat2/jgisummarizebamcontigdepths/** - tests/modules/metabat2/jgisummarizebamcontigdepths/** diff --git a/tests/modules/meryl/count/main.nf b/tests/modules/meryl/count/main.nf new file mode 100644 index 00000000..8706046a --- /dev/null +++ b/tests/modules/meryl/count/main.nf @@ -0,0 +1,15 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { MERYL_COUNT } from '../../../../modules/meryl/count/main.nf' + +workflow test_meryl_count_single_end { + + input = [ + [ id:'test' ], // meta map + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) + ] + + MERYL_COUNT ( input ) +} diff --git a/tests/modules/meryl/count/nextflow.config b/tests/modules/meryl/count/nextflow.config new file mode 100644 index 00000000..99e1f846 --- /dev/null +++ b/tests/modules/meryl/count/nextflow.config @@ -0,0 +1,7 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + ext.args = 'k=21' + ext.args2 = 'k=21' + +} diff --git a/tests/modules/meryl/count/test.yml b/tests/modules/meryl/count/test.yml new file mode 100644 index 00000000..0565bd1a --- /dev/null +++ b/tests/modules/meryl/count/test.yml @@ -0,0 +1,8 @@ +- name: meryl count test_meryl_count_single_end + command: nextflow run tests/modules/meryl/count -entry test_meryl_count_single_end -c tests/config/nextflow.config + tags: + - meryl/count + - meryl + files: + - path: output/meryl/versions.yml + md5sum: 5fe537d873925ccbcc4edf0983e9eda0 From 4591ea2205d305d6bf27511571eb98ff7e4b9052 Mon Sep 17 00:00:00 2001 From: Mahesh Binzer-Panchal Date: Mon, 9 May 2022 07:58:58 +0000 Subject: [PATCH 2/5] atomize --- modules/meryl/count/main.nf | 14 ++------- modules/meryl/count/meta.yml | 37 +++++++++-------------- tests/modules/meryl/count/main.nf | 2 +- tests/modules/meryl/count/nextflow.config | 1 - 4 files changed, 18 insertions(+), 36 deletions(-) diff --git a/modules/meryl/count/main.nf b/modules/meryl/count/main.nf index 80491242..f7a2811d 100644 --- a/modules/meryl/count/main.nf +++ b/modules/meryl/count/main.nf @@ -11,15 +11,14 @@ process MERYL_COUNT { tuple val(meta), path(reads) output: - tuple val(meta), path("*.meryl"), emit: meryl - path "versions.yml" , emit: versions + tuple val(meta), path("*.meryldb"), emit: meryl_db + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when script: def args = task.ext.args ?: '' - def args2 = task.ext.args2 ?: '' def prefix = task.ext.prefix ?: "${meta.id}" """ for READ in $reads; do @@ -27,15 +26,8 @@ process MERYL_COUNT { threads=$task.cpus \\ $args \\ $reads \\ - output read.\${READ%.f*}.meryl + output read.\${READ%.f*}.meryldb done - meryl union-sum \\ - threads=$task.cpus \\ - $args2 \\ - output ${prefix}.meryl - - # clean up - rm -rf read.*.meryl cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/meryl/count/meta.yml b/modules/meryl/count/meta.yml index 885267ec..dd59decd 100644 --- a/modules/meryl/count/meta.yml +++ b/modules/meryl/count/meta.yml @@ -1,51 +1,42 @@ name: "meryl_count" -## TODO nf-core: Add a description of the module and list keywords -description: write your description here +description: A genomic k-mer counter (and sequence utility) with nice features. keywords: - - sort + - k-mer + - count tools: - "meryl": - ## TODO nf-core: Add a description and other details for the software below - description: "" - homepage: "{}" - documentation: "{}" - tool_dev_url: "{}" + description: "A genomic k-mer counter (and sequence utility) with nice features. " + homepage: "https://github.com/marbl/meryl" + documentation: "https://meryl.readthedocs.io/en/latest/quick-start.html" + tool_dev_url: "https://github.com/marbl/meryl" doi: "" licence: "['GPL']" -## TODO nf-core: Add a description of all of the variables used as input input: - # Only when we have meta - meta: type: map description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - # - ## TODO nf-core: Delete / customise this example input - - bam: + - reads: type: file - description: BAM/CRAM/SAM file - pattern: "*.{bam,cram,sam}" + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. -## TODO nf-core: Add a description of all of the variables used as output output: - #Only when we have meta - meta: type: map description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - # - versions: type: file description: File containing software versions pattern: "versions.yml" - ## TODO nf-core: Delete / customise this example output - - bam: - type: file - description: Sorted BAM/CRAM/SAM file - pattern: "*.{bam,cram,sam}" + - meryl_db: + type: directory + description: A Meryl k-mer database authors: - "@mahesh-panchal" diff --git a/tests/modules/meryl/count/main.nf b/tests/modules/meryl/count/main.nf index 8706046a..7d65b7dd 100644 --- a/tests/modules/meryl/count/main.nf +++ b/tests/modules/meryl/count/main.nf @@ -4,7 +4,7 @@ nextflow.enable.dsl = 2 include { MERYL_COUNT } from '../../../../modules/meryl/count/main.nf' -workflow test_meryl_count_single_end { +workflow test_meryl_count { input = [ [ id:'test' ], // meta map diff --git a/tests/modules/meryl/count/nextflow.config b/tests/modules/meryl/count/nextflow.config index 99e1f846..6d899c50 100644 --- a/tests/modules/meryl/count/nextflow.config +++ b/tests/modules/meryl/count/nextflow.config @@ -2,6 +2,5 @@ process { publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } ext.args = 'k=21' - ext.args2 = 'k=21' } From 9ebeb6a702650f88906ce8d0cbf98bed158832a8 Mon Sep 17 00:00:00 2001 From: Mahesh Binzer-Panchal Date: Mon, 9 May 2022 08:05:07 +0000 Subject: [PATCH 3/5] Update test --- tests/modules/meryl/count/test.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/modules/meryl/count/test.yml b/tests/modules/meryl/count/test.yml index 0565bd1a..5437966c 100644 --- a/tests/modules/meryl/count/test.yml +++ b/tests/modules/meryl/count/test.yml @@ -1,8 +1,8 @@ -- name: meryl count test_meryl_count_single_end - command: nextflow run tests/modules/meryl/count -entry test_meryl_count_single_end -c tests/config/nextflow.config +- name: meryl count test_meryl_count + command: nextflow run tests/modules/meryl/count -entry test_meryl_count -c tests/config/nextflow.config tags: - - meryl/count - meryl + - meryl/count files: - path: output/meryl/versions.yml - md5sum: 5fe537d873925ccbcc4edf0983e9eda0 + md5sum: 9bc9470c2eff996026781d5ff8c2b369 From a43bc940d6aaeaa755b34484cffeb5b41bce92eb Mon Sep 17 00:00:00 2001 From: Mahesh Binzer-Panchal Date: Mon, 9 May 2022 11:15:14 +0000 Subject: [PATCH 4/5] Add suggestions from code review --- modules/meryl/count/meta.yml | 1 + tests/modules/meryl/count/main.nf | 17 +++++++++++++++-- tests/modules/meryl/count/test.yml | 17 +++++++++++++---- 3 files changed, 29 insertions(+), 6 deletions(-) diff --git a/modules/meryl/count/meta.yml b/modules/meryl/count/meta.yml index dd59decd..854f8759 100644 --- a/modules/meryl/count/meta.yml +++ b/modules/meryl/count/meta.yml @@ -37,6 +37,7 @@ output: - meryl_db: type: directory description: A Meryl k-mer database + pattern: "*.meryldb" authors: - "@mahesh-panchal" diff --git a/tests/modules/meryl/count/main.nf b/tests/modules/meryl/count/main.nf index 7d65b7dd..3c9a64c7 100644 --- a/tests/modules/meryl/count/main.nf +++ b/tests/modules/meryl/count/main.nf @@ -4,12 +4,25 @@ nextflow.enable.dsl = 2 include { MERYL_COUNT } from '../../../../modules/meryl/count/main.nf' -workflow test_meryl_count { +workflow test_meryl_count_single_end { input = [ - [ id:'test' ], // meta map + [ id:'test' , single_end: true ], // meta map file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) ] MERYL_COUNT ( input ) } + +workflow test_meryl_count_paired_end { + + input = [ + [ id:'test' , single_end: false ], // meta map + [ + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) + ] + ] + + MERYL_COUNT ( input ) +} diff --git a/tests/modules/meryl/count/test.yml b/tests/modules/meryl/count/test.yml index 5437966c..6291a62f 100644 --- a/tests/modules/meryl/count/test.yml +++ b/tests/modules/meryl/count/test.yml @@ -1,8 +1,17 @@ -- name: meryl count test_meryl_count - command: nextflow run tests/modules/meryl/count -entry test_meryl_count -c tests/config/nextflow.config +- name: meryl count test_meryl_count_single_end + command: nextflow run tests/modules/meryl/count -entry test_meryl_count_single_end -c tests/config/nextflow.config tags: - - meryl - meryl/count + - meryl files: - path: output/meryl/versions.yml - md5sum: 9bc9470c2eff996026781d5ff8c2b369 + md5sum: 5fe537d873925ccbcc4edf0983e9eda0 + +- name: meryl count test_meryl_count_paired_end + command: nextflow run tests/modules/meryl/count -entry test_meryl_count_paired_end -c tests/config/nextflow.config + tags: + - meryl/count + - meryl + files: + - path: output/meryl/versions.yml + md5sum: 4961f13cfb60ba8764ed666e70dbf12c From 453d5b66a47b0a1e3e06c254a01699703eced3a6 Mon Sep 17 00:00:00 2001 From: Jasmin F <73216762+jasmezz@users.noreply.github.com> Date: Mon, 9 May 2022 14:46:18 +0200 Subject: [PATCH 5/5] Add Bacteroides fragilis GFF file --- tests/config/test_data.config | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/config/test_data.config b/tests/config/test_data.config index 4ff6efd9..c4f470a4 100644 --- a/tests/config/test_data.config +++ b/tests/config/test_data.config @@ -345,6 +345,7 @@ params { genome_gbff_gz = "${test_data_dir}/genomics/prokaryotes/bacteroides_fragilis/genome/genome.gbff.gz" genome_paf = "${test_data_dir}/genomics/prokaryotes/bacteroides_fragilis/genome/genome.paf" genome_mapping_potential_arg = "${test_data_dir}/genomics/prokaryotes/bacteroides_fragilis/genome/genome.mapping.potential.ARG" + genome_gff_gz = "${test_data_dir}/genomics/prokaryotes/bacteroides_fragilis/genome/genome.gff.gz" } 'illumina' {