From b67556e29f1f155550ce9ae9c967cbb559f6f9f1 Mon Sep 17 00:00:00 2001 From: Mahesh Binzer-Panchal Date: Wed, 29 Jun 2022 18:04:36 +0200 Subject: [PATCH] Add FastK/Merge (#1828) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Add FastK/Merge * Update modules/fastk/merge/main.nf * Update modules/fastk/merge/meta.yml Co-authored-by: Sébastien Guizard Co-authored-by: Sébastien Guizard --- modules/fastk/fastk/main.nf | 8 +-- modules/fastk/merge/main.nf | 39 +++++++++++++++ modules/fastk/merge/meta.yml | 60 +++++++++++++++++++++++ tests/config/pytest_modules.yml | 4 ++ tests/modules/fastk/fastk/test.yml | 6 +++ tests/modules/fastk/merge/main.nf | 58 ++++++++++++++++++++++ tests/modules/fastk/merge/nextflow.config | 14 ++++++ tests/modules/fastk/merge/test.yml | 39 +++++++++++++++ 8 files changed, 224 insertions(+), 4 deletions(-) create mode 100644 modules/fastk/merge/main.nf create mode 100644 modules/fastk/merge/meta.yml create mode 100644 tests/modules/fastk/merge/main.nf create mode 100644 tests/modules/fastk/merge/nextflow.config create mode 100644 tests/modules/fastk/merge/test.yml diff --git a/modules/fastk/fastk/main.nf b/modules/fastk/fastk/main.nf index 148a8168..1e3e5d07 100644 --- a/modules/fastk/fastk/main.nf +++ b/modules/fastk/fastk/main.nf @@ -12,10 +12,10 @@ process FASTK_FASTK { tuple val(meta), path(reads) output: - tuple val(meta), path("*.hist") , emit: hist - tuple val(meta), path("*.ktab*", hidden: true), emit: ktab, optional: true - tuple val(meta), path("*.prof*", hidden: true), emit: prof, optional: true - path "versions.yml" , emit: versions + tuple val(meta), path("*.hist") , emit: hist + tuple val(meta), path("*.ktab*", hidden: true) , emit: ktab, optional: true + tuple val(meta), path("*.{prof,pidx}*", hidden: true), emit: prof, optional: true + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when diff --git a/modules/fastk/merge/main.nf b/modules/fastk/merge/main.nf new file mode 100644 index 00000000..ef21b7b3 --- /dev/null +++ b/modules/fastk/merge/main.nf @@ -0,0 +1,39 @@ +process FASTK_MERGE { + tag "$meta.id" + label 'process_medium' + + if (params.enable_conda) { + error "Conda environments cannot be used when using the FastK tool. Please use docker or singularity containers." + } + // WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions. + container 'ghcr.io/nbisweden/fastk_genescopefk_merquryfk:1.0' + + input: + tuple val(meta), path(hist), path(ktab), path(prof) + + output: + tuple val(meta), path("*.hist") , emit: hist + tuple val(meta), path("*.ktab*", hidden: true) , emit: ktab, optional: true + tuple val(meta), path("*.{prof,pidx}*", hidden: true), emit: prof, optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def FASTK_VERSION = 'f18a4e6d2207539f7b84461daebc54530a9559b0' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + """ + Fastmerge \\ + $args \\ + -T$task.cpus \\ + ${prefix} \\ + $hist + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fastk: $FASTK_VERSION + END_VERSIONS + """ +} diff --git a/modules/fastk/merge/meta.yml b/modules/fastk/merge/meta.yml new file mode 100644 index 00000000..b9fe8dec --- /dev/null +++ b/modules/fastk/merge/meta.yml @@ -0,0 +1,60 @@ +name: "fastk_merge" +description: A tool to merge FastK histograms +keywords: + - merge + - k-mer + - histogram + - fastk +tools: + - fastk: + description: "A fast K-mer counter for high-fidelity shotgun datasets" + homepage: "https://github.com/thegenemyers/FASTK" + documentation: "" + tool_dev_url: "https://github.com/thegenemyers/FASTK" + doi: "" + licence: "https://github.com/thegenemyers/FASTK/blob/master/LICENSE" + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fastk_hist: + type: file + description: A histogram files from the program FastK + pattern: "*.hist" + - fastk_ktab: + type: file + description: Histogram ktab files from the program FastK (option -t) + pattern: "*.ktab*" + - fastk_prof: + type: file + description: Histogram profile files from the program FastK (option -p) + pattern: "*.{prof,pidx}*" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - fastk_hist: + type: file + description: A histogram files from the program FastK + pattern: "*.hist" + - fastk_ktab: + type: file + description: Histogram ktab files from the program FastK (option -t) + pattern: "*.ktab*" + - fastk_prof: + type: file + description: Histogram profile files from the program FastK (option -p) + pattern: "*.{prof,pidx}*" + +authors: + - "@mahesh-panchal" diff --git a/tests/config/pytest_modules.yml b/tests/config/pytest_modules.yml index a6a37bb9..de4eb424 100644 --- a/tests/config/pytest_modules.yml +++ b/tests/config/pytest_modules.yml @@ -695,6 +695,10 @@ fastk/histex: - modules/fastk/histex/** - tests/modules/fastk/histex/** +fastk/merge: + - modules/fastk/merge/** + - tests/modules/fastk/merge/** + fastp: - modules/fastp/** - tests/modules/fastp/** diff --git a/tests/modules/fastk/fastk/test.yml b/tests/modules/fastk/fastk/test.yml index be522a34..fd3fdebe 100644 --- a/tests/modules/fastk/fastk/test.yml +++ b/tests/modules/fastk/fastk/test.yml @@ -8,6 +8,8 @@ md5sum: ceeacd0cb3aa69bf9b2a402830b40e26 - path: output/fastk/.test_fk.ktab.2 md5sum: f2629fd15b285aed3dc2d5fe546edf3f + - path: output/fastk/.test_fk.pidx.1 + md5sum: 90bc384f61d2ecdb4586ab52ab04fddf - path: output/fastk/.test_fk.prof.1 md5sum: ebd48923a724cf79934f0b2ed42ba73d - path: output/fastk/test_fk.hist @@ -27,6 +29,10 @@ md5sum: 7f28fb44940fda799797e3069f5d7263 - path: output/fastk/.test_fk.ktab.2 md5sum: c14a85c128926ace78372f09029977b1 + - path: output/fastk/.test_fk.pidx.1 + md5sum: e7e760f714070a4afefb38ffff559684 + - path: output/fastk/.test_fk.pidx.2 + md5sum: a549612bbdba2506eb3311237638c4b0 - path: output/fastk/.test_fk.prof.1 md5sum: 46a5fd9e297262b058f8c1fd062fcf56 - path: output/fastk/.test_fk.prof.2 diff --git a/tests/modules/fastk/merge/main.nf b/tests/modules/fastk/merge/main.nf new file mode 100644 index 00000000..1bed019e --- /dev/null +++ b/tests/modules/fastk/merge/main.nf @@ -0,0 +1,58 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { FASTK_FASTK } from '../../../../modules/fastk/fastk/main.nf' +include { FASTK_MERGE } from '../../../../modules/fastk/merge/main.nf' + +workflow test_fastk_merge_hist_only { + + input1 = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) + ] + ] + input2= [ + [ id:'test', single_end:false ], // meta map + [ + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) + ] + ] + + FASTK_FASTK ( Channel.of( input1, input2 ) ) + FASTK_MERGE ( + FASTK_FASTK.out.hist.groupTuple() + .join( FASTK_FASTK.out.ktab.groupTuple(), remainder: true ) + .join( FASTK_FASTK.out.prof.groupTuple(), remainder: true ) + .map { meta, hist, ktab, prof -> [meta, hist, ktab ? ktab.flatten() : [] , prof ? prof.flatten() : [] ] } + ) +} + +workflow test_fastk_merge_all_files { + + input1 = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) + ] + ] + input2= [ + [ id:'test', single_end:false ], // meta map + [ + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) + ] + ] + + FASTK_FASTK ( Channel.of( input1, input2 ) ) + FASTK_MERGE ( + FASTK_FASTK.out.hist.groupTuple() + .join( FASTK_FASTK.out.ktab.groupTuple(), remainder: true ) + .join( FASTK_FASTK.out.prof.groupTuple(), remainder: true ) + .map { meta, hist, ktab, prof -> [meta, hist, ktab ? ktab.flatten() : [] , prof ? prof.flatten() : [] ] } + ) +} diff --git a/tests/modules/fastk/merge/nextflow.config b/tests/modules/fastk/merge/nextflow.config new file mode 100644 index 00000000..57fd2c73 --- /dev/null +++ b/tests/modules/fastk/merge/nextflow.config @@ -0,0 +1,14 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + + withName: 'test_fastk_merge_all_files:FASTK_FASTK' { + ext.args = '-t1 -p' + } + withName: 'FASTK_FASTK' { + ext.args = '-t1' + ext.prefix = { "${meta.id}_${task.index}" } + publishDir = [ enabled: false ] + } + +} diff --git a/tests/modules/fastk/merge/test.yml b/tests/modules/fastk/merge/test.yml new file mode 100644 index 00000000..2c3fba2e --- /dev/null +++ b/tests/modules/fastk/merge/test.yml @@ -0,0 +1,39 @@ +- name: fastk merge test_fastk_merge_hist_only + command: nextflow run ./tests/modules/fastk/merge -entry test_fastk_merge_hist_only -c ./tests/config/nextflow.config -c ./tests/modules/fastk/merge/nextflow.config + tags: + - fastk/merge + - fastk + files: + - path: output/fastk/.test.ktab.1 + md5sum: dff1e9d326aea87778645235cfa3380f + - path: output/fastk/.test.ktab.2 + md5sum: 438e7807dcc2eb8120e1338838147600 + - path: output/fastk/test.hist + md5sum: 75d41eb0e3f8af5456711a95966e2b00 + - path: output/fastk/test.ktab + md5sum: 7d882f4e3542df1a2f5cb0858b12dc03 + +- name: fastk merge test_fastk_merge_all_files + command: nextflow run ./tests/modules/fastk/merge -entry test_fastk_merge_all_files -c ./tests/config/nextflow.config -c ./tests/modules/fastk/merge/nextflow.config + tags: + - fastk/merge + - fastk + files: + - path: output/fastk/.test.ktab.1 + md5sum: dff1e9d326aea87778645235cfa3380f + - path: output/fastk/.test.ktab.2 + md5sum: 438e7807dcc2eb8120e1338838147600 + - path: output/fastk/.test.pidx.1 + md5sum: 7553344778f6571192f04c7e659b967c + - path: output/fastk/.test.pidx.2 + md5sum: 09bb65fb10db58d1fd5bb0a123fccfc6 + - path: output/fastk/.test.prof.1 + md5sum: 7f0f84a62c89063ec18c9b0e447afe93 + - path: output/fastk/.test.prof.2 + md5sum: 7f0f84a62c89063ec18c9b0e447afe93 + - path: output/fastk/test.hist + md5sum: 75d41eb0e3f8af5456711a95966e2b00 + - path: output/fastk/test.ktab + md5sum: 7d882f4e3542df1a2f5cb0858b12dc03 + - path: output/fastk/test.prof + md5sum: d3c7d8decd4ea6e298291b8be0e2de85