feat: add sourmash sketch (#1287)

Co-authored-by: Jose Espinosa-Carrasco <kadomu@gmail.com>
Co-authored-by: Harshil Patel <drpatelh@users.noreply.github.com>
This commit is contained in:
Moritz E. Beber 2022-02-15 12:30:28 +01:00 committed by GitHub
parent e745e167c1
commit 7560689375
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 111 additions and 0 deletions

View file

@ -0,0 +1,35 @@
process SOURMASH_SKETCH {
tag "$meta.id"
label 'process_low'
conda (params.enable_conda ? "bioconda::sourmash=4.2.4" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/sourmash:4.2.4--hdfd78af_0':
'quay.io/biocontainers/sourmash:4.2.4--hdfd78af_0' }"
input:
tuple val(meta), path(sequence)
output:
tuple val(meta), path("*.sig"), emit: signatures
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: "dna --param-string 'scaled=1000,k=31'"
def prefix = task.ext.prefix ?: "${meta.id}"
"""
sourmash sketch \\
$args \\
--merge '${prefix}' \\
--output '${prefix}.sig' \\
$sequence
cat <<-END_VERSIONS > versions.yml
"${task.process}":
sourmash: \$(echo \$(sourmash --version 2>&1) | sed 's/^sourmash //' )
END_VERSIONS
"""
}

View file

@ -0,0 +1,42 @@
name: sourmash_sketch
description: Create a signature (a hash sketch) of a sequence using sourmash
keywords:
- hash sketch
- signature
tools:
- sourmash:
description: Compute and compare MinHash signatures for DNA data sets.
homepage: https://sourmash.readthedocs.io/
documentation: https://sourmash.readthedocs.io/
tool_dev_url: https://github.com/dib-lab/sourmash
doi: "10.1186/s13059-016-0997-x"
licence: ['BSD-3-clause']
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- sequence:
type: file
description: FastA file containing (genomic) sequence data
pattern: "*.{fna,fa,fasta}"
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- signatures:
type: file
description: MinHash signature of the given sequence
pattern: "*.{sig}"
authors:
- "@Midnighter"

View file

@ -1417,6 +1417,10 @@ snpsites:
- modules/snpsites/** - modules/snpsites/**
- tests/modules/snpsites/** - tests/modules/snpsites/**
sourmash/sketch:
- modules/sourmash/sketch/**
- tests/modules/sourmash/sketch/**
spades: spades:
- modules/spades/** - modules/spades/**
- tests/modules/spades/** - tests/modules/spades/**

View file

@ -0,0 +1,15 @@
#!/usr/bin/env nextflow
nextflow.enable.dsl = 2
include { SOURMASH_SKETCH } from '../../../../modules/sourmash/sketch/main.nf'
workflow test_sourmash_sketch {
input = [
[ id:'test', single_end:false ], // meta map
file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
]
SOURMASH_SKETCH ( input )
}

View file

@ -0,0 +1,5 @@
process {
publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
}

View file

@ -0,0 +1,10 @@
- name: sourmash sketch test_sourmash_sketch
command: nextflow run tests/modules/sourmash/sketch -entry test_sourmash_sketch -c tests/config/nextflow.config
tags:
- sourmash
- sourmash/sketch
files:
- path: output/sourmash/test.sig
md5sum: 4b0dee307e35fc670cd0d416321e4961
- path: output/sourmash/versions.yml
md5sum: ec70dabc3e4b3bf11d39c17a99ca4bc6