Add MerquryFK (#1778)

* Add MerquryFK

* Change resource allocation
This commit is contained in:
Mahesh Binzer-Panchal 2022-06-14 16:36:02 +02:00 committed by GitHub
parent fd5aea0b76
commit 8ded6d441a
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 273 additions and 0 deletions

View file

@ -0,0 +1,55 @@
process MERQURYFK_MERQURYFK {
tag "$meta.id"
label 'process_medium'
if (params.enable_conda) {
error "Conda environments cannot be used when using the FastK tool. Please use docker or singularity containers."
}
container 'ghcr.io/nbisweden/fastk_genescopefk_merquryfk:1.0'
input:
tuple val(meta), path(fastk_hist), path(fastk_ktab), path(assembly)
output:
tuple val(meta), path("${prefix}.completeness.stats") , emit: stats
tuple val(meta), path("${prefix}.*_only.bed") , emit: bed
tuple val(meta), path("${prefix}.*.qv") , emit: assembly_qv
tuple val(meta), path("${prefix}.*.spectra-cn.fl.png"), emit: spectra_cn_fl_png, optional: true
tuple val(meta), path("${prefix}.*.spectra-cn.fl.pdf"), emit: spectra_cn_fl_pdf, optional: true
tuple val(meta), path("${prefix}.*.spectra-cn.ln.png"), emit: spectra_cn_ln_png, optional: true
tuple val(meta), path("${prefix}.*.spectra-cn.ln.pdf"), emit: spectra_cn_ln_pdf, optional: true
tuple val(meta), path("${prefix}.*.spectra-cn.st.png"), emit: spectra_cn_st_png, optional: true
tuple val(meta), path("${prefix}.*.spectra-cn.st.pdf"), emit: spectra_cn_st_pdf, optional: true
tuple val(meta), path("${prefix}.qv") , emit: qv
tuple val(meta), path("${prefix}.spectra-asm.fl.png") , emit: spectra_asm_fl_png, optional: true
tuple val(meta), path("${prefix}.spectra-asm.fl.pdf") , emit: spectra_asm_fl_pdf, optional: true
tuple val(meta), path("${prefix}.spectra-asm.ln.png") , emit: spectra_asm_ln_png, optional: true
tuple val(meta), path("${prefix}.spectra-asm.ln.pdf") , emit: spectra_asm_ln_pdf, optional: true
tuple val(meta), path("${prefix}.spectra-asm.st.png") , emit: spectra_asm_st_png, optional: true
tuple val(meta), path("${prefix}.spectra-asm.st.pdf") , emit: spectra_asm_st_pdf, optional: true
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
prefix = task.ext.prefix ?: "${meta.id}"
def FASTK_VERSION = 'f18a4e6d2207539f7b84461daebc54530a9559b0'
def MERQURY_VERSION = '8f3ab706e4cf4d7b7d1dfe5739859e3ebd26c494'
"""
MerquryFK \\
$args \\
-T$task.cpus \\
${fastk_ktab.find{ it.toString().endsWith(".ktab") }} \\
$assembly \\
$prefix
cat <<-END_VERSIONS > versions.yml
"${task.process}":
fastk: $FASTK_VERSION
merquryfk: $MERQURY_VERSION
r: \$( R --version | sed '1!d; s/.*version //; s/ .*//' )
END_VERSIONS
"""
}

View file

@ -0,0 +1,109 @@
name: "merquryfk_merquryfk"
description: FastK based version of Merqury
keywords:
- sort
tools:
- "merquryfk":
description: "FastK based version of Merqury"
homepage: "https://github.com/thegenemyers/MERQURY.FK"
documentation: ""
tool_dev_url: "https://github.com/thegenemyers/MERQURY.FK"
doi: ""
licence: "https://github.com/thegenemyers/MERQURY.FK/blob/main/LICENSE"
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- fastk_hist:
type: file
description: A histogram files from the program FastK
pattern: "*.hist"
- fastk_ktab:
type: file
description: Histogram ktab files from the program FastK (option -t)
pattern: "*.ktab*"
- assembly:
type: file
description: Genome assembly files (fasta format)
pattern: ".fasta"
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- stats:
type: file
description: Assembly statistics file
pattern: "*.completeness.stats"
- bed:
type: file
description: Assembly only kmer positions not supported by reads in bed format
pattern: "*_only.bed"
- spectra_cn_fl_png:
type: file
description: "Unstacked copy number spectra filled plot in PNG format"
pattern: "*.spectra-cn.fl.png"
- spectra_cn_ln_png:
type: file
description: "Unstacked copy number spectra line plot in PNG format"
pattern: "*.spectra-cn.ln.png"
- spectra_cn_st_png:
type: file
description: "Stacked copy number spectra line plot in PNG format"
pattern: "*.spectra-cn.st.png"
- spectra_asm_fl_png:
type: file
description: "Unstacked assembly spectra filled plot in PNG format"
pattern: "*.spectra-asm.fl.png"
- spectra_asm_ln_png:
type: file
description: "Unstacked assembly spectra line plot in PNG format"
pattern: "*.spectra-asm.ln.png"
- spectra_asm_st_png:
type: file
description: "Stacked assembly spectra line plot in PNG format"
pattern: "*.spectra-asm.st.png"
- spectra_cn_fl_pdf:
type: file
description: "Unstacked copy number spectra filled plot in PDF format"
pattern: "*.spectra-cn.fl.pdf"
- spectra_cn_ln_pdf:
type: file
description: "Unstacked copy number spectra line plot in PDF format"
pattern: "*.spectra-cn.ln.pdf"
- spectra_cn_st_pdf:
type: file
description: "Stacked copy number spectra line plot in PDF format"
pattern: "*.spectra-cn.st.pdf"
- spectra_asm_fl_pdf:
type: file
description: "Unstacked assembly spectra filled plot in PDF format"
pattern: "*.spectra-asm.fl.pdf"
- spectra_asm_ln_pdf:
type: file
description: "Unstacked assembly spectra line plot in PDF format"
pattern: "*.spectra-asm.ln.pdf"
- spectra_asm_st_pdf:
type: file
description: "Stacked assembly spectra line plot in PDF format"
pattern: "*.spectra-asm.st.pdf"
- assembly_qv:
type: file
description: "error and qv table for each scaffold of the assembly"
pattern: "*.qv"
- qv:
type: file
description: "error and qv of each assembly as a whole"
pattern: "*.qv"
authors:
- "@mahesh-panchal"

View file

@ -1330,6 +1330,10 @@ meningotype:
- modules/meningotype/**
- tests/modules/meningotype/**
merquryfk/merquryfk:
- modules/merquryfk/merquryfk/**
- tests/modules/merquryfk/merquryfk/**
meryl/count:
- modules/meryl/count/**
- tests/modules/meryl/count/**

View file

@ -0,0 +1,42 @@
#!/usr/bin/env nextflow
nextflow.enable.dsl = 2
include { FASTK_FASTK } from '../../../../modules/fastk/fastk/main.nf'
include { MERQURYFK_MERQURYFK } from '../../../../modules/merquryfk/merquryfk/main.nf'
workflow test_merquryfk_merquryfk_png {
input = [
[ id:'test', single_end:true ], // meta map
file(params.test_data['homo_sapiens']['illumina']['test_1_fastq_gz'], checkIfExists: true)
]
assembly = [
[ id:'test', single_end:true ], // meta map
file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
]
FASTK_FASTK ( input )
MERQURYFK_MERQURYFK ( FASTK_FASTK.out.hist
.join( FASTK_FASTK.out.ktab )
.join( Channel.value( assembly ) )
)
}
workflow test_merquryfk_merquryfk_pdf {
input = [
[ id:'test', single_end:true ], // meta map
file(params.test_data['homo_sapiens']['illumina']['test_1_fastq_gz'], checkIfExists: true)
]
assembly = [
[ id:'test', single_end:true ], // meta map
file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
]
FASTK_FASTK ( input )
MERQURYFK_MERQURYFK ( FASTK_FASTK.out.hist
.join( FASTK_FASTK.out.ktab )
.join( Channel.value( assembly ) )
)
}

View file

@ -0,0 +1,16 @@
process {
publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
withName: 'FASTK_.*' {
ext.args = '-t'
publishDir = [ enabled: false ]
}
withName: 'test_merquryfk_merquryfk_png:MERQURYFK_MERQURYFK' {
ext.args = '-lfs'
}
withName: 'test_merquryfk_merquryfk_pdf:MERQURYFK_MERQURYFK' {
ext.args = '-lfs -pdf'
}
}

View file

@ -0,0 +1,47 @@
- name: merquryfk merquryfk test_merquryfk_merquryfk_png
command: nextflow run ./tests/modules/merquryfk/merquryfk -entry test_merquryfk_merquryfk_png -c ./tests/config/nextflow.config -c ./tests/modules/merquryfk/merquryfk/nextflow.config
tags:
- merquryfk/merquryfk
- merquryfk
files:
- path: output/merquryfk/test.completeness.stats
md5sum: 797224fa75606bbda7f62caae7c9151c
- path: output/merquryfk/test.genome.qv
md5sum: 1e11be8abfa2230024042832f58f96f9
- path: output/merquryfk/test.genome.spectra-cn.fl.png
md5sum: 50bad6b85b8f80b8411b50e4119bc959
- path: output/merquryfk/test.genome.spectra-cn.ln.png
md5sum: 50bad6b85b8f80b8411b50e4119bc959
- path: output/merquryfk/test.genome.spectra-cn.st.png
md5sum: 50bad6b85b8f80b8411b50e4119bc959
- path: output/merquryfk/test.genome_only.bed
md5sum: 662d73e8a0019708feb538ec53c220f7
- path: output/merquryfk/test.qv
md5sum: 02b4d5c639ca706ff707bad89e29d90e
- path: output/merquryfk/test.spectra-asm.fl.png
md5sum: bb9ac38fe8991fbacbe791e562626d89
- path: output/merquryfk/test.spectra-asm.ln.png
md5sum: 48534a9d5e2ce9365eb1f0397c76f337
- path: output/merquryfk/test.spectra-asm.st.png
md5sum: 5e031a8b9ba2800e291721a11f4d9c00
- name: merquryfk merquryfk test_merquryfk_merquryfk_pdf
command: nextflow run ./tests/modules/merquryfk/merquryfk -entry test_merquryfk_merquryfk_pdf -c ./tests/config/nextflow.config -c ./tests/modules/merquryfk/merquryfk/nextflow.config
tags:
- merquryfk/merquryfk
- merquryfk
files:
- path: output/merquryfk/test.completeness.stats
md5sum: 797224fa75606bbda7f62caae7c9151c
- path: output/merquryfk/test.genome.qv
md5sum: 1e11be8abfa2230024042832f58f96f9
- path: output/merquryfk/test.genome.spectra-cn.fl.pdf
- path: output/merquryfk/test.genome.spectra-cn.ln.pdf
- path: output/merquryfk/test.genome.spectra-cn.st.pdf
- path: output/merquryfk/test.genome_only.bed
md5sum: 662d73e8a0019708feb538ec53c220f7
- path: output/merquryfk/test.qv
md5sum: 02b4d5c639ca706ff707bad89e29d90e
- path: output/merquryfk/test.spectra-asm.fl.pdf
- path: output/merquryfk/test.spectra-asm.ln.pdf
- path: output/merquryfk/test.spectra-asm.st.pdf