From bf5edb86148d881965c62eafef08c09a5a4a62ba Mon Sep 17 00:00:00 2001 From: "Robert A. Petit III" Date: Tue, 4 Jan 2022 11:29:49 -0500 Subject: [PATCH] add module for mash dist (#1193) * add module for mash dist * fix lint Co-authored-by: Gregor Sturm --- modules/mash/dist/main.nf | 34 +++++++++++++++++++ modules/mash/dist/meta.yml | 45 +++++++++++++++++++++++++ tests/config/pytest_modules.yml | 4 +++ tests/modules/mash/dist/main.nf | 16 +++++++++ tests/modules/mash/dist/nextflow.config | 5 +++ tests/modules/mash/dist/test.yml | 10 ++++++ 6 files changed, 114 insertions(+) create mode 100644 modules/mash/dist/main.nf create mode 100644 modules/mash/dist/meta.yml create mode 100644 tests/modules/mash/dist/main.nf create mode 100644 tests/modules/mash/dist/nextflow.config create mode 100644 tests/modules/mash/dist/test.yml diff --git a/modules/mash/dist/main.nf b/modules/mash/dist/main.nf new file mode 100644 index 00000000..323bd339 --- /dev/null +++ b/modules/mash/dist/main.nf @@ -0,0 +1,34 @@ +process MASH_DIST { + tag "$meta.id" + label 'process_low' + + conda (params.enable_conda ? "bioconda::mash=2.3" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mash:2.3--he348c14_1' : + 'quay.io/biocontainers/mash:2.3--he348c14_1' }" + + input: + tuple val(meta), path(query) + path reference + + output: + tuple val(meta), path("*.txt"), emit: dist + path "versions.yml" , emit: versions + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + mash \\ + dist \\ + -p $task.cpus \\ + $args \\ + $reference \\ + $query > ${prefix}.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + mash: \$(mash --version 2>&1) + END_VERSIONS + """ +} diff --git a/modules/mash/dist/meta.yml b/modules/mash/dist/meta.yml new file mode 100644 index 00000000..8cbaa63c --- /dev/null +++ b/modules/mash/dist/meta.yml @@ -0,0 +1,45 @@ +name: mash_dist +description: Calculate Mash distances between reference and query seqeunces +keywords: + - mash/dist +tools: + - mash: + description: Fast sequence distance estimator that uses MinHash + homepage: https://github.com/marbl/Mash + documentation: https://mash.readthedocs.io/en/latest/sketches.html + tool_dev_url: https://github.com/marbl/Mash + doi: "10.1186/s13059-016-0997-x" + licence: ['https://github.com/marbl/Mash/blob/master/LICENSE.txt'] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reference: + type: file + description: FASTA, FASTQ or Mash sketch + pattern: "*.{fasta,fasta.gz,fastq,fastq.gz,msh}" + - query: + type: file + description: FASTA, FASTQ or Mash sketch + pattern: "*.{fasta,fasta.gz,fastq,fastq.gz,msh}" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - dist: + type: file + description: The results from mash dist + pattern: "*.txt" + +authors: + - "@rpetit3" diff --git a/tests/config/pytest_modules.yml b/tests/config/pytest_modules.yml index 5088d85a..a5b9b47e 100644 --- a/tests/config/pytest_modules.yml +++ b/tests/config/pytest_modules.yml @@ -900,6 +900,10 @@ mapdamage2: - modules/mapdamage2/** - tests/modules/mapdamage2/** +mash/dist: + - modules/mash/dist/** + - tests/modules/mash/dist/** + mash/sketch: - modules/mash/sketch/** - tests/modules/mash/sketch/** diff --git a/tests/modules/mash/dist/main.nf b/tests/modules/mash/dist/main.nf new file mode 100644 index 00000000..10c9c9fa --- /dev/null +++ b/tests/modules/mash/dist/main.nf @@ -0,0 +1,16 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { MASH_DIST } from '../../../../modules/mash/dist/main.nf' + +workflow test_mash_dist { + + input = [ + [ id:'test', single_end:false ], // meta map + file(params.test_data['sarscov2']['illumina']['contigs_fasta'], checkIfExists: true) + ] + + reference = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + MASH_DIST ( input, reference ) +} diff --git a/tests/modules/mash/dist/nextflow.config b/tests/modules/mash/dist/nextflow.config new file mode 100644 index 00000000..50f50a7a --- /dev/null +++ b/tests/modules/mash/dist/nextflow.config @@ -0,0 +1,5 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + +} \ No newline at end of file diff --git a/tests/modules/mash/dist/test.yml b/tests/modules/mash/dist/test.yml new file mode 100644 index 00000000..2602f87d --- /dev/null +++ b/tests/modules/mash/dist/test.yml @@ -0,0 +1,10 @@ +- name: mash dist test_mash_dist + command: nextflow run tests/modules/mash/dist -entry test_mash_dist -c tests/config/nextflow.config + tags: + - mash + - mash/dist + files: + - path: output/mash/test.txt + md5sum: 1dfd5b53e1996f9fdb9d260b3e81c4d6 + - path: output/mash/versions.yml + md5sum: f786d48a9af59e823d19c171969d0d50