From 4e308c131e29bc2cca3d46ba8f7e59e9d3785198 Mon Sep 17 00:00:00 2001 From: Mahesh Binzer-Panchal Date: Tue, 14 Jun 2022 13:39:52 +0200 Subject: [PATCH] FastK/fastK (#1739) * Add FastK/FastK * Add ktab and prof outputs * Add the hidden files necessary for the optional files * Update image * Apply suggestions from code review Co-authored-by: Daniel Straub <42973691+d4straub@users.noreply.github.com> Co-authored-by: Daniel Straub <42973691+d4straub@users.noreply.github.com> --- modules/fastk/fastk/main.nf | 38 +++++++++++++++++ modules/fastk/fastk/meta.yml | 52 +++++++++++++++++++++++ tests/config/pytest_modules.yml | 4 ++ tests/modules/fastk/fastk/main.nf | 28 ++++++++++++ tests/modules/fastk/fastk/nextflow.config | 6 +++ tests/modules/fastk/fastk/test.yml | 39 +++++++++++++++++ 6 files changed, 167 insertions(+) create mode 100644 modules/fastk/fastk/main.nf create mode 100644 modules/fastk/fastk/meta.yml create mode 100644 tests/modules/fastk/fastk/main.nf create mode 100644 tests/modules/fastk/fastk/nextflow.config create mode 100644 tests/modules/fastk/fastk/test.yml diff --git a/modules/fastk/fastk/main.nf b/modules/fastk/fastk/main.nf new file mode 100644 index 00000000..a9981114 --- /dev/null +++ b/modules/fastk/fastk/main.nf @@ -0,0 +1,38 @@ +process FASTK_FASTK { + tag "$meta.id" + label 'process_medium' + + if (params.enable_conda) { + error "Conda environments cannot be used when using the FastK tool. Please use docker or singularity containers." + } + container 'ghcr.io/nbisweden/fastk_genescopefk_merquryfk:1.0' + + input: + tuple val(meta), path(reads) + + output: + tuple val(meta), path("*.hist") , emit: hist + tuple val(meta), path("*.ktab*", hidden: true), emit: ktab, optional: true + tuple val(meta), path("*.prof*", hidden: true), emit: prof, optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def FASTK_VERSION = 'f18a4e6d2207539f7b84461daebc54530a9559b0' + """ + FastK \\ + $args \\ + -T$task.cpus \\ + -N${prefix}_fk \\ + $reads + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fastk: $FASTK_VERSION + END_VERSIONS + """ +} diff --git a/modules/fastk/fastk/meta.yml b/modules/fastk/fastk/meta.yml new file mode 100644 index 00000000..83b476a3 --- /dev/null +++ b/modules/fastk/fastk/meta.yml @@ -0,0 +1,52 @@ +name: "fastk_fastk" +description: A fast K-mer counter for high-fidelity shotgun datasets +keywords: + - k-mer + - count + - histogram +tools: + - "fastk": + description: "A fast K-mer counter for high-fidelity shotgun datasets" + homepage: "https://github.com/thegenemyers/FASTK" + documentation: "" + tool_dev_url: "https://github.com/thegenemyers/FASTK" + doi: "" + licence: "https://github.com/thegenemyers/FASTK/blob/master/LICENSE" + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - hist: + type: file + description: Histogram of k-mers + pattern: "*.hist" + - ktab: + type: file + description: A sorted table of all canonical k‑mers along with their counts. + pattern: "*.ktab" + - prof: + type: file + description: A k‑mer count profile of each sequence in the input data set. + pattern: "*.prof" + +authors: + - "@mahesh-panchal" diff --git a/tests/config/pytest_modules.yml b/tests/config/pytest_modules.yml index 7999940e..c136b58c 100644 --- a/tests/config/pytest_modules.yml +++ b/tests/config/pytest_modules.yml @@ -675,6 +675,10 @@ fastani: - modules/fastani/** - tests/modules/fastani/** +fastk/fastk: + - modules/fastk/fastk/** + - tests/modules/fastk/fastk/** + fastp: - modules/fastp/** - tests/modules/fastp/** diff --git a/tests/modules/fastk/fastk/main.nf b/tests/modules/fastk/fastk/main.nf new file mode 100644 index 00000000..944d434f --- /dev/null +++ b/tests/modules/fastk/fastk/main.nf @@ -0,0 +1,28 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { FASTK_FASTK } from '../../../../modules/fastk/fastk/main.nf' + +workflow test_fastk_fastk_single_end { + + input = [ + [ id:'test' , single_end: true ], // meta map + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) + ] + + FASTK_FASTK ( input ) +} + +workflow test_fastk_fastk_paired_end { + + input = [ + [ id:'test' , single_end: false ], // meta map + [ + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) + ] + ] + + FASTK_FASTK ( input ) +} diff --git a/tests/modules/fastk/fastk/nextflow.config b/tests/modules/fastk/fastk/nextflow.config new file mode 100644 index 00000000..14d50725 --- /dev/null +++ b/tests/modules/fastk/fastk/nextflow.config @@ -0,0 +1,6 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + ext.args = '-t -p' + +} diff --git a/tests/modules/fastk/fastk/test.yml b/tests/modules/fastk/fastk/test.yml new file mode 100644 index 00000000..be522a34 --- /dev/null +++ b/tests/modules/fastk/fastk/test.yml @@ -0,0 +1,39 @@ +- name: fastk fastk test_fastk_fastk_single_end + command: nextflow run ./tests/modules/fastk/fastk -entry test_fastk_fastk_single_end -c ./tests/config/nextflow.config -c ./tests/modules/fastk/fastk/nextflow.config + tags: + - fastk + - fastk/fastk + files: + - path: output/fastk/.test_fk.ktab.1 + md5sum: ceeacd0cb3aa69bf9b2a402830b40e26 + - path: output/fastk/.test_fk.ktab.2 + md5sum: f2629fd15b285aed3dc2d5fe546edf3f + - path: output/fastk/.test_fk.prof.1 + md5sum: ebd48923a724cf79934f0b2ed42ba73d + - path: output/fastk/test_fk.hist + md5sum: c80e12f7321e62dba4b437d7bff36ec0 + - path: output/fastk/test_fk.ktab + md5sum: a605a58931a4b5029469e1c2575c8cee + - path: output/fastk/test_fk.prof + md5sum: 43d426c95d277b8148406624d513bd40 + +- name: fastk fastk test_fastk_fastk_paired_end + command: nextflow run ./tests/modules/fastk/fastk -entry test_fastk_fastk_paired_end -c ./tests/config/nextflow.config -c ./tests/modules/fastk/fastk/nextflow.config + tags: + - fastk + - fastk/fastk + files: + - path: output/fastk/.test_fk.ktab.1 + md5sum: 7f28fb44940fda799797e3069f5d7263 + - path: output/fastk/.test_fk.ktab.2 + md5sum: c14a85c128926ace78372f09029977b1 + - path: output/fastk/.test_fk.prof.1 + md5sum: 46a5fd9e297262b058f8c1fd062fcf56 + - path: output/fastk/.test_fk.prof.2 + md5sum: 80326a7406f41ccf2e51e341fc804132 + - path: output/fastk/test_fk.hist + md5sum: 4f75b550d87ed4f26a2b10a05ac7e98c + - path: output/fastk/test_fk.ktab + md5sum: fddd5be0c36ad1d2131b8d8774f7657a + - path: output/fastk/test_fk.prof + md5sum: d3c7d8decd4ea6e298291b8be0e2de85