Add FastK/Merge (#1828)

* Add FastK/Merge

* Update modules/fastk/merge/main.nf

* Update modules/fastk/merge/meta.yml

Co-authored-by: Sébastien Guizard <sguizard@ed.ac.uk>

Co-authored-by: Sébastien Guizard <sguizard@ed.ac.uk>
This commit is contained in:
Mahesh Binzer-Panchal 2022-06-29 18:04:36 +02:00 committed by GitHub
parent 80746895e2
commit b67556e29f
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
8 changed files with 224 additions and 4 deletions

View file

@ -12,10 +12,10 @@ process FASTK_FASTK {
tuple val(meta), path(reads)
output:
tuple val(meta), path("*.hist") , emit: hist
tuple val(meta), path("*.ktab*", hidden: true), emit: ktab, optional: true
tuple val(meta), path("*.prof*", hidden: true), emit: prof, optional: true
path "versions.yml" , emit: versions
tuple val(meta), path("*.hist") , emit: hist
tuple val(meta), path("*.ktab*", hidden: true) , emit: ktab, optional: true
tuple val(meta), path("*.{prof,pidx}*", hidden: true), emit: prof, optional: true
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when

View file

@ -0,0 +1,39 @@
process FASTK_MERGE {
tag "$meta.id"
label 'process_medium'
if (params.enable_conda) {
error "Conda environments cannot be used when using the FastK tool. Please use docker or singularity containers."
}
// WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions.
container 'ghcr.io/nbisweden/fastk_genescopefk_merquryfk:1.0'
input:
tuple val(meta), path(hist), path(ktab), path(prof)
output:
tuple val(meta), path("*.hist") , emit: hist
tuple val(meta), path("*.ktab*", hidden: true) , emit: ktab, optional: true
tuple val(meta), path("*.{prof,pidx}*", hidden: true), emit: prof, optional: true
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def FASTK_VERSION = 'f18a4e6d2207539f7b84461daebc54530a9559b0' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions.
"""
Fastmerge \\
$args \\
-T$task.cpus \\
${prefix} \\
$hist
cat <<-END_VERSIONS > versions.yml
"${task.process}":
fastk: $FASTK_VERSION
END_VERSIONS
"""
}

View file

@ -0,0 +1,60 @@
name: "fastk_merge"
description: A tool to merge FastK histograms
keywords:
- merge
- k-mer
- histogram
- fastk
tools:
- fastk:
description: "A fast K-mer counter for high-fidelity shotgun datasets"
homepage: "https://github.com/thegenemyers/FASTK"
documentation: ""
tool_dev_url: "https://github.com/thegenemyers/FASTK"
doi: ""
licence: "https://github.com/thegenemyers/FASTK/blob/master/LICENSE"
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- fastk_hist:
type: file
description: A histogram files from the program FastK
pattern: "*.hist"
- fastk_ktab:
type: file
description: Histogram ktab files from the program FastK (option -t)
pattern: "*.ktab*"
- fastk_prof:
type: file
description: Histogram profile files from the program FastK (option -p)
pattern: "*.{prof,pidx}*"
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- fastk_hist:
type: file
description: A histogram files from the program FastK
pattern: "*.hist"
- fastk_ktab:
type: file
description: Histogram ktab files from the program FastK (option -t)
pattern: "*.ktab*"
- fastk_prof:
type: file
description: Histogram profile files from the program FastK (option -p)
pattern: "*.{prof,pidx}*"
authors:
- "@mahesh-panchal"

View file

@ -695,6 +695,10 @@ fastk/histex:
- modules/fastk/histex/**
- tests/modules/fastk/histex/**
fastk/merge:
- modules/fastk/merge/**
- tests/modules/fastk/merge/**
fastp:
- modules/fastp/**
- tests/modules/fastp/**

View file

@ -8,6 +8,8 @@
md5sum: ceeacd0cb3aa69bf9b2a402830b40e26
- path: output/fastk/.test_fk.ktab.2
md5sum: f2629fd15b285aed3dc2d5fe546edf3f
- path: output/fastk/.test_fk.pidx.1
md5sum: 90bc384f61d2ecdb4586ab52ab04fddf
- path: output/fastk/.test_fk.prof.1
md5sum: ebd48923a724cf79934f0b2ed42ba73d
- path: output/fastk/test_fk.hist
@ -27,6 +29,10 @@
md5sum: 7f28fb44940fda799797e3069f5d7263
- path: output/fastk/.test_fk.ktab.2
md5sum: c14a85c128926ace78372f09029977b1
- path: output/fastk/.test_fk.pidx.1
md5sum: e7e760f714070a4afefb38ffff559684
- path: output/fastk/.test_fk.pidx.2
md5sum: a549612bbdba2506eb3311237638c4b0
- path: output/fastk/.test_fk.prof.1
md5sum: 46a5fd9e297262b058f8c1fd062fcf56
- path: output/fastk/.test_fk.prof.2

View file

@ -0,0 +1,58 @@
#!/usr/bin/env nextflow
nextflow.enable.dsl = 2
include { FASTK_FASTK } from '../../../../modules/fastk/fastk/main.nf'
include { FASTK_MERGE } from '../../../../modules/fastk/merge/main.nf'
workflow test_fastk_merge_hist_only {
input1 = [
[ id:'test', single_end:false ], // meta map
[
file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true)
]
]
input2= [
[ id:'test', single_end:false ], // meta map
[
file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true)
]
]
FASTK_FASTK ( Channel.of( input1, input2 ) )
FASTK_MERGE (
FASTK_FASTK.out.hist.groupTuple()
.join( FASTK_FASTK.out.ktab.groupTuple(), remainder: true )
.join( FASTK_FASTK.out.prof.groupTuple(), remainder: true )
.map { meta, hist, ktab, prof -> [meta, hist, ktab ? ktab.flatten() : [] , prof ? prof.flatten() : [] ] }
)
}
workflow test_fastk_merge_all_files {
input1 = [
[ id:'test', single_end:false ], // meta map
[
file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true)
]
]
input2= [
[ id:'test', single_end:false ], // meta map
[
file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true)
]
]
FASTK_FASTK ( Channel.of( input1, input2 ) )
FASTK_MERGE (
FASTK_FASTK.out.hist.groupTuple()
.join( FASTK_FASTK.out.ktab.groupTuple(), remainder: true )
.join( FASTK_FASTK.out.prof.groupTuple(), remainder: true )
.map { meta, hist, ktab, prof -> [meta, hist, ktab ? ktab.flatten() : [] , prof ? prof.flatten() : [] ] }
)
}

View file

@ -0,0 +1,14 @@
process {
publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
withName: 'test_fastk_merge_all_files:FASTK_FASTK' {
ext.args = '-t1 -p'
}
withName: 'FASTK_FASTK' {
ext.args = '-t1'
ext.prefix = { "${meta.id}_${task.index}" }
publishDir = [ enabled: false ]
}
}

View file

@ -0,0 +1,39 @@
- name: fastk merge test_fastk_merge_hist_only
command: nextflow run ./tests/modules/fastk/merge -entry test_fastk_merge_hist_only -c ./tests/config/nextflow.config -c ./tests/modules/fastk/merge/nextflow.config
tags:
- fastk/merge
- fastk
files:
- path: output/fastk/.test.ktab.1
md5sum: dff1e9d326aea87778645235cfa3380f
- path: output/fastk/.test.ktab.2
md5sum: 438e7807dcc2eb8120e1338838147600
- path: output/fastk/test.hist
md5sum: 75d41eb0e3f8af5456711a95966e2b00
- path: output/fastk/test.ktab
md5sum: 7d882f4e3542df1a2f5cb0858b12dc03
- name: fastk merge test_fastk_merge_all_files
command: nextflow run ./tests/modules/fastk/merge -entry test_fastk_merge_all_files -c ./tests/config/nextflow.config -c ./tests/modules/fastk/merge/nextflow.config
tags:
- fastk/merge
- fastk
files:
- path: output/fastk/.test.ktab.1
md5sum: dff1e9d326aea87778645235cfa3380f
- path: output/fastk/.test.ktab.2
md5sum: 438e7807dcc2eb8120e1338838147600
- path: output/fastk/.test.pidx.1
md5sum: 7553344778f6571192f04c7e659b967c
- path: output/fastk/.test.pidx.2
md5sum: 09bb65fb10db58d1fd5bb0a123fccfc6
- path: output/fastk/.test.prof.1
md5sum: 7f0f84a62c89063ec18c9b0e447afe93
- path: output/fastk/.test.prof.2
md5sum: 7f0f84a62c89063ec18c9b0e447afe93
- path: output/fastk/test.hist
md5sum: 75d41eb0e3f8af5456711a95966e2b00
- path: output/fastk/test.ktab
md5sum: 7d882f4e3542df1a2f5cb0858b12dc03
- path: output/fastk/test.prof
md5sum: d3c7d8decd4ea6e298291b8be0e2de85