FastK/fastK (#1739)

* Add FastK/FastK

* Add ktab and prof outputs

* Add the hidden files necessary for the optional files

* Update image

* Apply suggestions from code review

Co-authored-by: Daniel Straub <42973691+d4straub@users.noreply.github.com>

Co-authored-by: Daniel Straub <42973691+d4straub@users.noreply.github.com>
This commit is contained in:
Mahesh Binzer-Panchal 2022-06-14 13:39:52 +02:00 committed by GitHub
parent 8b2a473f58
commit 4e308c131e
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 167 additions and 0 deletions

View file

@ -0,0 +1,38 @@
process FASTK_FASTK {
tag "$meta.id"
label 'process_medium'
if (params.enable_conda) {
error "Conda environments cannot be used when using the FastK tool. Please use docker or singularity containers."
}
container 'ghcr.io/nbisweden/fastk_genescopefk_merquryfk:1.0'
input:
tuple val(meta), path(reads)
output:
tuple val(meta), path("*.hist") , emit: hist
tuple val(meta), path("*.ktab*", hidden: true), emit: ktab, optional: true
tuple val(meta), path("*.prof*", hidden: true), emit: prof, optional: true
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def FASTK_VERSION = 'f18a4e6d2207539f7b84461daebc54530a9559b0'
"""
FastK \\
$args \\
-T$task.cpus \\
-N${prefix}_fk \\
$reads
cat <<-END_VERSIONS > versions.yml
"${task.process}":
fastk: $FASTK_VERSION
END_VERSIONS
"""
}

View file

@ -0,0 +1,52 @@
name: "fastk_fastk"
description: A fast K-mer counter for high-fidelity shotgun datasets
keywords:
- k-mer
- count
- histogram
tools:
- "fastk":
description: "A fast K-mer counter for high-fidelity shotgun datasets"
homepage: "https://github.com/thegenemyers/FASTK"
documentation: ""
tool_dev_url: "https://github.com/thegenemyers/FASTK"
doi: ""
licence: "https://github.com/thegenemyers/FASTK/blob/master/LICENSE"
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- reads:
type: file
description: |
List of input FastQ files of size 1 and 2 for single-end and paired-end data,
respectively.
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- hist:
type: file
description: Histogram of k-mers
pattern: "*.hist"
- ktab:
type: file
description: A sorted table of all canonical kmers along with their counts.
pattern: "*.ktab"
- prof:
type: file
description: A kmer count profile of each sequence in the input data set.
pattern: "*.prof"
authors:
- "@mahesh-panchal"

View file

@ -675,6 +675,10 @@ fastani:
- modules/fastani/**
- tests/modules/fastani/**
fastk/fastk:
- modules/fastk/fastk/**
- tests/modules/fastk/fastk/**
fastp:
- modules/fastp/**
- tests/modules/fastp/**

View file

@ -0,0 +1,28 @@
#!/usr/bin/env nextflow
nextflow.enable.dsl = 2
include { FASTK_FASTK } from '../../../../modules/fastk/fastk/main.nf'
workflow test_fastk_fastk_single_end {
input = [
[ id:'test' , single_end: true ], // meta map
file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true)
]
FASTK_FASTK ( input )
}
workflow test_fastk_fastk_paired_end {
input = [
[ id:'test' , single_end: false ], // meta map
[
file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true)
]
]
FASTK_FASTK ( input )
}

View file

@ -0,0 +1,6 @@
process {
publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
ext.args = '-t -p'
}

View file

@ -0,0 +1,39 @@
- name: fastk fastk test_fastk_fastk_single_end
command: nextflow run ./tests/modules/fastk/fastk -entry test_fastk_fastk_single_end -c ./tests/config/nextflow.config -c ./tests/modules/fastk/fastk/nextflow.config
tags:
- fastk
- fastk/fastk
files:
- path: output/fastk/.test_fk.ktab.1
md5sum: ceeacd0cb3aa69bf9b2a402830b40e26
- path: output/fastk/.test_fk.ktab.2
md5sum: f2629fd15b285aed3dc2d5fe546edf3f
- path: output/fastk/.test_fk.prof.1
md5sum: ebd48923a724cf79934f0b2ed42ba73d
- path: output/fastk/test_fk.hist
md5sum: c80e12f7321e62dba4b437d7bff36ec0
- path: output/fastk/test_fk.ktab
md5sum: a605a58931a4b5029469e1c2575c8cee
- path: output/fastk/test_fk.prof
md5sum: 43d426c95d277b8148406624d513bd40
- name: fastk fastk test_fastk_fastk_paired_end
command: nextflow run ./tests/modules/fastk/fastk -entry test_fastk_fastk_paired_end -c ./tests/config/nextflow.config -c ./tests/modules/fastk/fastk/nextflow.config
tags:
- fastk
- fastk/fastk
files:
- path: output/fastk/.test_fk.ktab.1
md5sum: 7f28fb44940fda799797e3069f5d7263
- path: output/fastk/.test_fk.ktab.2
md5sum: c14a85c128926ace78372f09029977b1
- path: output/fastk/.test_fk.prof.1
md5sum: 46a5fd9e297262b058f8c1fd062fcf56
- path: output/fastk/.test_fk.prof.2
md5sum: 80326a7406f41ccf2e51e341fc804132
- path: output/fastk/test_fk.hist
md5sum: 4f75b550d87ed4f26a2b10a05ac7e98c
- path: output/fastk/test_fk.ktab
md5sum: fddd5be0c36ad1d2131b8d8774f7657a
- path: output/fastk/test_fk.prof
md5sum: d3c7d8decd4ea6e298291b8be0e2de85