Add mash screen

This commit is contained in:
Mahesh Binzer-Panchal 2022-05-20 13:23:11 +00:00
parent c8ccfe3710
commit 2926ec0abb
6 changed files with 127 additions and 0 deletions

View file

@ -0,0 +1,38 @@
process MASH_SCREEN {
tag "$meta.id"
label 'process_medium'
conda (params.enable_conda ? "bioconda::mash=2.3" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/mash:2.3--he348c14_1':
'quay.io/biocontainers/mash:2.3--he348c14_1' }"
input:
tuple val(meta), path(query_sketch)
path fastx_db
output:
tuple val(meta), path("*.screen"), emit: screen
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
"""
mash \\
screen \\
$args \\
-p $task.cpus \\
$query_sketch \\
$fastx_db \\
> ${prefix}.screen
cat <<-END_VERSIONS > versions.yml
"${task.process}":
mash: \$( mash --version )
END_VERSIONS
"""
}

View file

@ -0,0 +1,47 @@
name: "mash_screen"
description: Screens query sequences against large sequence databases
keywords:
- screen
- containment
- contamination
- taxonomic assignment
tools:
- "mash":
description: Fast sequence distance estimator that uses MinHash
homepage: https://github.com/marbl/Mash
documentation: https://mash.readthedocs.io/en/latest/sketches.html
tool_dev_url: https://github.com/marbl/Mash
doi: "10.1186/s13059-016-0997-x"
licence: ["https://github.com/marbl/Mash/blob/master/LICENSE.txt"]
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- query_sketch:
type: file
description: MinHash sketch of query sequences
pattern: "*.msh"
- fastx_db:
type: file
description: Sequence files to match against
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- screen:
type: file
description: List of sequences from fastx_db similar to query sequences
pattern: "*.screen"
authors:
- "@mahesh-panchal"

View file

@ -1214,6 +1214,10 @@ mash/dist:
- modules/mash/dist/** - modules/mash/dist/**
- tests/modules/mash/dist/** - tests/modules/mash/dist/**
mash/screen:
- modules/mash/screen/**
- tests/modules/mash/screen/**
mash/sketch: mash/sketch:
- modules/mash/sketch/** - modules/mash/sketch/**
- tests/modules/mash/sketch/** - tests/modules/mash/sketch/**

View file

@ -0,0 +1,21 @@
#!/usr/bin/env nextflow
nextflow.enable.dsl = 2
include { MASH_SKETCH } from '../../../../modules/mash/sketch/main.nf'
include { MASH_SCREEN } from '../../../../modules/mash/screen/main.nf'
workflow test_mash_screen {
input = [
[ id:'test', single_end:false], // meta map
[
file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true)
]
]
fastx_db = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
MASH_SKETCH ( input )
MASH_SCREEN ( MASH_SKETCH.out.mash, fastx_db )
}

View file

@ -0,0 +1,5 @@
process {
publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
}

View file

@ -0,0 +1,12 @@
- name: mash screen test_mash_screen
command: nextflow run ./tests/modules/mash/screen -entry test_mash_screen -c ./tests/config/nextflow.config -c ./tests/modules/mash/screen/nextflow.config
tags:
- mash
- mash/screen
files:
- path: output/mash/test.mash_stats
md5sum: 2a6f297d8e69a5e4160243bc6c89129c
- path: output/mash/test.msh
md5sum: d747145a43dad5f82342036f8f5d9133
- path: output/mash/test.screen
md5sum: d3c871dccd5cd57ab54781fa5c5d7278