From 2926ec0abb192e29888a0b040ec776e99826891f Mon Sep 17 00:00:00 2001 From: Mahesh Binzer-Panchal Date: Fri, 20 May 2022 13:23:11 +0000 Subject: [PATCH] Add mash screen --- modules/mash/screen/main.nf | 38 ++++++++++++++++++ modules/mash/screen/meta.yml | 47 +++++++++++++++++++++++ tests/config/pytest_modules.yml | 4 ++ tests/modules/mash/screen/main.nf | 21 ++++++++++ tests/modules/mash/screen/nextflow.config | 5 +++ tests/modules/mash/screen/test.yml | 12 ++++++ 6 files changed, 127 insertions(+) create mode 100644 modules/mash/screen/main.nf create mode 100644 modules/mash/screen/meta.yml create mode 100644 tests/modules/mash/screen/main.nf create mode 100644 tests/modules/mash/screen/nextflow.config create mode 100644 tests/modules/mash/screen/test.yml diff --git a/modules/mash/screen/main.nf b/modules/mash/screen/main.nf new file mode 100644 index 00000000..3e7859ed --- /dev/null +++ b/modules/mash/screen/main.nf @@ -0,0 +1,38 @@ +process MASH_SCREEN { + tag "$meta.id" + label 'process_medium' + + conda (params.enable_conda ? "bioconda::mash=2.3" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mash:2.3--he348c14_1': + 'quay.io/biocontainers/mash:2.3--he348c14_1' }" + + input: + tuple val(meta), path(query_sketch) + path fastx_db + + output: + tuple val(meta), path("*.screen"), emit: screen + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + mash \\ + screen \\ + $args \\ + -p $task.cpus \\ + $query_sketch \\ + $fastx_db \\ + > ${prefix}.screen + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + mash: \$( mash --version ) + END_VERSIONS + """ +} diff --git a/modules/mash/screen/meta.yml b/modules/mash/screen/meta.yml new file mode 100644 index 00000000..81e455bc --- /dev/null +++ b/modules/mash/screen/meta.yml @@ -0,0 +1,47 @@ +name: "mash_screen" +description: Screens query sequences against large sequence databases +keywords: + - screen + - containment + - contamination + - taxonomic assignment +tools: + - "mash": + description: Fast sequence distance estimator that uses MinHash + homepage: https://github.com/marbl/Mash + documentation: https://mash.readthedocs.io/en/latest/sketches.html + tool_dev_url: https://github.com/marbl/Mash + doi: "10.1186/s13059-016-0997-x" + licence: ["https://github.com/marbl/Mash/blob/master/LICENSE.txt"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - query_sketch: + type: file + description: MinHash sketch of query sequences + pattern: "*.msh" + - fastx_db: + type: file + description: Sequence files to match against + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - screen: + type: file + description: List of sequences from fastx_db similar to query sequences + pattern: "*.screen" + +authors: + - "@mahesh-panchal" diff --git a/tests/config/pytest_modules.yml b/tests/config/pytest_modules.yml index 16411798..c1aeb0c6 100644 --- a/tests/config/pytest_modules.yml +++ b/tests/config/pytest_modules.yml @@ -1214,6 +1214,10 @@ mash/dist: - modules/mash/dist/** - tests/modules/mash/dist/** +mash/screen: + - modules/mash/screen/** + - tests/modules/mash/screen/** + mash/sketch: - modules/mash/sketch/** - tests/modules/mash/sketch/** diff --git a/tests/modules/mash/screen/main.nf b/tests/modules/mash/screen/main.nf new file mode 100644 index 00000000..7f0b8bd9 --- /dev/null +++ b/tests/modules/mash/screen/main.nf @@ -0,0 +1,21 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { MASH_SKETCH } from '../../../../modules/mash/sketch/main.nf' +include { MASH_SCREEN } from '../../../../modules/mash/screen/main.nf' + +workflow test_mash_screen { + + input = [ + [ id:'test', single_end:false], // meta map + [ + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) + ] + ] + fastx_db = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + + MASH_SKETCH ( input ) + MASH_SCREEN ( MASH_SKETCH.out.mash, fastx_db ) +} diff --git a/tests/modules/mash/screen/nextflow.config b/tests/modules/mash/screen/nextflow.config new file mode 100644 index 00000000..50f50a7a --- /dev/null +++ b/tests/modules/mash/screen/nextflow.config @@ -0,0 +1,5 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + +} \ No newline at end of file diff --git a/tests/modules/mash/screen/test.yml b/tests/modules/mash/screen/test.yml new file mode 100644 index 00000000..f622817a --- /dev/null +++ b/tests/modules/mash/screen/test.yml @@ -0,0 +1,12 @@ +- name: mash screen test_mash_screen + command: nextflow run ./tests/modules/mash/screen -entry test_mash_screen -c ./tests/config/nextflow.config -c ./tests/modules/mash/screen/nextflow.config + tags: + - mash + - mash/screen + files: + - path: output/mash/test.mash_stats + md5sum: 2a6f297d8e69a5e4160243bc6c89129c + - path: output/mash/test.msh + md5sum: d747145a43dad5f82342036f8f5d9133 + - path: output/mash/test.screen + md5sum: d3c871dccd5cd57ab54781fa5c5d7278