add module for mash dist (#1193)

* add module for mash dist

* fix lint

Co-authored-by: Gregor Sturm <mail@gregor-sturm.de>
This commit is contained in:
Robert A. Petit III 2022-01-04 11:29:49 -05:00 committed by GitHub
parent e3285528ac
commit bf5edb8614
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 114 additions and 0 deletions

34
modules/mash/dist/main.nf vendored Normal file
View file

@ -0,0 +1,34 @@
process MASH_DIST {
tag "$meta.id"
label 'process_low'
conda (params.enable_conda ? "bioconda::mash=2.3" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/mash:2.3--he348c14_1' :
'quay.io/biocontainers/mash:2.3--he348c14_1' }"
input:
tuple val(meta), path(query)
path reference
output:
tuple val(meta), path("*.txt"), emit: dist
path "versions.yml" , emit: versions
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
"""
mash \\
dist \\
-p $task.cpus \\
$args \\
$reference \\
$query > ${prefix}.txt
cat <<-END_VERSIONS > versions.yml
"${task.process}":
mash: \$(mash --version 2>&1)
END_VERSIONS
"""
}

45
modules/mash/dist/meta.yml vendored Normal file
View file

@ -0,0 +1,45 @@
name: mash_dist
description: Calculate Mash distances between reference and query seqeunces
keywords:
- mash/dist
tools:
- mash:
description: Fast sequence distance estimator that uses MinHash
homepage: https://github.com/marbl/Mash
documentation: https://mash.readthedocs.io/en/latest/sketches.html
tool_dev_url: https://github.com/marbl/Mash
doi: "10.1186/s13059-016-0997-x"
licence: ['https://github.com/marbl/Mash/blob/master/LICENSE.txt']
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- reference:
type: file
description: FASTA, FASTQ or Mash sketch
pattern: "*.{fasta,fasta.gz,fastq,fastq.gz,msh}"
- query:
type: file
description: FASTA, FASTQ or Mash sketch
pattern: "*.{fasta,fasta.gz,fastq,fastq.gz,msh}"
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- dist:
type: file
description: The results from mash dist
pattern: "*.txt"
authors:
- "@rpetit3"

View file

@ -900,6 +900,10 @@ mapdamage2:
- modules/mapdamage2/** - modules/mapdamage2/**
- tests/modules/mapdamage2/** - tests/modules/mapdamage2/**
mash/dist:
- modules/mash/dist/**
- tests/modules/mash/dist/**
mash/sketch: mash/sketch:
- modules/mash/sketch/** - modules/mash/sketch/**
- tests/modules/mash/sketch/** - tests/modules/mash/sketch/**

16
tests/modules/mash/dist/main.nf vendored Normal file
View file

@ -0,0 +1,16 @@
#!/usr/bin/env nextflow
nextflow.enable.dsl = 2
include { MASH_DIST } from '../../../../modules/mash/dist/main.nf'
workflow test_mash_dist {
input = [
[ id:'test', single_end:false ], // meta map
file(params.test_data['sarscov2']['illumina']['contigs_fasta'], checkIfExists: true)
]
reference = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
MASH_DIST ( input, reference )
}

View file

@ -0,0 +1,5 @@
process {
publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
}

10
tests/modules/mash/dist/test.yml vendored Normal file
View file

@ -0,0 +1,10 @@
- name: mash dist test_mash_dist
command: nextflow run tests/modules/mash/dist -entry test_mash_dist -c tests/config/nextflow.config
tags:
- mash
- mash/dist
files:
- path: output/mash/test.txt
md5sum: 1dfd5b53e1996f9fdb9d260b3e81c4d6
- path: output/mash/versions.yml
md5sum: f786d48a9af59e823d19c171969d0d50