Add khmer unique-kmers.py module (#1838)

This commit is contained in:
Jose Espinosa-Carrasco 2022-06-30 17:23:30 +02:00 committed by GitHub
parent 44baf85bcc
commit 82fdff4fb4
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 111 additions and 0 deletions

View file

@ -0,0 +1,38 @@
process KHMER_UNIQUEKMERS {
tag "$fasta"
label 'process_low'
conda (params.enable_conda ? "bioconda::khmer=3.0.0a3" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/khmer:3.0.0a3--py37haa7609a_2' :
'quay.io/biocontainers/khmer:3.0.0a3--py37haa7609a_2' }"
input:
path fasta
val kmer_size
output:
path "report.txt" , emit: report
path "kmers.txt" , emit: kmers
path "versions.yml", emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
"""
unique-kmers.py \\
-k $kmer_size \\
-R report.txt \\
$args \\
$fasta
grep ^number report.txt | sed 's/^.*:.[[:blank:]]//g' > kmers.txt
cat <<-END_VERSIONS > versions.yml
"${task.process}":
khmer: \$( unique-kmers.py --version 2>&1 | grep ^khmer | sed 's/^khmer //;s/ .*\$//' )
END_VERSIONS
"""
}

View file

@ -0,0 +1,42 @@
name: "khmer_uniquekmers"
description:
keywords:
- khmer
- k-mer
- effective genome size
tools:
- "khmer":
description: khmer k-mer counting library
homepage: https://github.com/dib-lab/khmer
documentation: https://khmer.readthedocs.io/en/latest/
tool_dev_url: https://github.com/dib-lab/khmer
doi: "10.12688/f1000research.6924.1"
licence: ["BSD License"]
input:
- fasta:
type: file
description: fasta file
pattern: "*.{fa,fasta}"
- kmer_size:
type: value
description: k-mer size to use
pattern: "[0-9]+"
output:
- report:
type: file
description: Text file containing unique-kmers.py execution report
pattern: "report.txt"
- kmers:
type: file
description: Text file containing number of kmers
pattern: "kmers.txt"
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
authors:
- "@JoseEspinosa"

View file

@ -1177,6 +1177,10 @@ khmer/normalizebymedian:
- modules/khmer/normalizebymedian/** - modules/khmer/normalizebymedian/**
- tests/modules/khmer/normalizebymedian/** - tests/modules/khmer/normalizebymedian/**
khmer/uniquekmers:
- modules/khmer/uniquekmers/**
- tests/modules/khmer/uniquekmers/**
kleborate: kleborate:
- modules/kleborate/** - modules/kleborate/**
- tests/modules/kleborate/** - tests/modules/kleborate/**

View file

@ -0,0 +1,12 @@
#!/usr/bin/env nextflow
nextflow.enable.dsl = 2
include { KHMER_UNIQUEKMERS } from '../../../../modules/khmer/uniquekmers/main.nf'
workflow test_khmer_uniquekmers {
input = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
KHMER_UNIQUEKMERS ( input, 50 )
}

View file

@ -0,0 +1,5 @@
process {
publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
}

View file

@ -0,0 +1,10 @@
- name: khmer uniquekmers test_khmer_uniquekmers
command: nextflow run ./tests/modules/khmer/uniquekmers -entry test_khmer_uniquekmers -c ./tests/config/nextflow.config -c ./tests/modules/khmer/uniquekmers/nextflow.config
tags:
- khmer
- khmer/uniquekmers
files:
- path: output/khmer/kmers.txt
md5sum: 496ebf23653a01c7a42d743e47c19f65
- path: output/khmer/report.txt
md5sum: ee489abd3b244dea3640649e1790d55e