Add MerquryFK KatGC (#1798)

* Add KAT GC

* Add additional container warning
This commit is contained in:
Mahesh Binzer-Panchal 2022-06-23 20:57:15 +02:00 committed by GitHub
parent 2f0b192404
commit a7193dc628
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 180 additions and 0 deletions

View file

@ -0,0 +1,44 @@
process MERQURYFK_KATGC {
tag "$meta.id"
label 'process_medium'
if (params.enable_conda) {
error "Conda environments cannot be used when using the FastK tool. Please use docker or singularity containers."
}
container 'ghcr.io/nbisweden/fastk_genescopefk_merquryfk:1.0' // WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions.
input:
tuple val(meta), path(fastk_hist), path(fastk_ktab)
output:
tuple val(meta), path("*.fi.png"), emit: filled_gc_plot_png , optional: true
tuple val(meta), path("*.fi.pdf"), emit: filled_gc_plot_pdf , optional: true
tuple val(meta), path("*.ln.png"), emit: line_gc_plot_png , optional: true
tuple val(meta), path("*.ln.pdf"), emit: line_gc_plot_pdf , optional: true
tuple val(meta), path("*.st.png"), emit: stacked_gc_plot_png, optional: true
tuple val(meta), path("*.st.pdf"), emit: stacked_gc_plot_pdf, optional: true
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def FASTK_VERSION = 'f18a4e6d2207539f7b84461daebc54530a9559b0' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions.
def MERQURY_VERSION = '8f3ab706e4cf4d7b7d1dfe5739859e3ebd26c494' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions.
"""
KatGC \\
$args \\
-T$task.cpus \\
${fastk_ktab.find{ it.toString().endsWith(".ktab") }} \\
$prefix
cat <<-END_VERSIONS > versions.yml
"${task.process}":
fastk: $FASTK_VERSION
merquryfk: $MERQURY_VERSION
r: \$( R --version | sed '1!d; s/.*version //; s/ .*//' )
END_VERSIONS
"""
}

View file

@ -0,0 +1,66 @@
name: "merquryfk_katgc"
description: A reimplemenation of Kat GC to work with FastK databases
keywords:
- k-mer
- GC content
tools:
- "merquryfk":
description: "FastK based version of Merqury"
homepage: "https://github.com/thegenemyers/MERQURY.FK"
documentation: ""
tool_dev_url: "https://github.com/thegenemyers/MERQURY.FK"
doi: ""
licence: "https://github.com/thegenemyers/MERQURY.FK/blob/main/LICENSE"
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- fastk_hist:
type: file
description: A histogram files from the program FastK
pattern: "*.hist"
- fastk_ktab:
type: file
description: ktab files from the program FastK
pattern: "*.ktab*"
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- filled_gc_plot_png:
type: file
description: A filled GC content plot in PNG format
pattern: "*.fi.png"
- filled_gc_plot_pdf:
type: file
description: A filled GC content plot in PDF format
pattern: "*.fi.pdf"
- line_gc_plot_png:
type: file
description: A line GC content plot in PNG format
pattern: "*.ln.png"
- line_gc_plot_pdf:
type: file
description: A line GC content plot in PDF format
pattern: "*.ln.pdf"
- stacked_gc_plot_png:
type: file
description: A stacked GC content plot in PNG format
pattern: "*.st.png"
- stacked_gc_plot_pdf:
type: file
description: A stacked GC content plot in PDF format
pattern: "*.st.pdf"
authors:
- "@mahesh-panchal"

View file

@ -1354,6 +1354,10 @@ merquryfk/katcomp:
- modules/merquryfk/katcomp/**
- tests/modules/merquryfk/katcomp/**
merquryfk/katgc:
- modules/merquryfk/katgc/**
- tests/modules/merquryfk/katgc/**
merquryfk/merquryfk:
- modules/merquryfk/merquryfk/**
- tests/modules/merquryfk/merquryfk/**

View file

@ -0,0 +1,32 @@
#!/usr/bin/env nextflow
nextflow.enable.dsl = 2
include { FASTK_FASTK } from '../../../../modules/fastk/fastk/main.nf'
include { MERQURYFK_KATGC } from '../../../../modules/merquryfk/katgc/main.nf'
workflow test_merquryfk_katgc_png {
input = [
[ id:'test', single_end:true ], // meta map
file(params.test_data['homo_sapiens']['illumina']['test_1_fastq_gz'], checkIfExists: true)
]
FASTK_FASTK ( input )
MERQURYFK_KATGC ( FASTK_FASTK.out.hist
.join( FASTK_FASTK.out.ktab )
)
}
workflow test_merquryfk_katgc_pdf {
input = [
[ id:'test', single_end:true ], // meta map
file(params.test_data['homo_sapiens']['illumina']['test_1_fastq_gz'], checkIfExists: true)
]
FASTK_FASTK ( input )
MERQURYFK_KATGC ( FASTK_FASTK.out.hist
.join( FASTK_FASTK.out.ktab )
)
}

View file

@ -0,0 +1,12 @@
process {
publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
withName: 'FASTK_.*' {
ext.args = '-t'
publishDir = [ enabled: false ]
}
withName: 'test_merquryfk_katgc_pdf:MERQURYFK_KATGC' {
ext.args = '-pdf'
}
}

View file

@ -0,0 +1,22 @@
- name: merquryfk katgc test_merquryfk_katgc_png
command: nextflow run ./tests/modules/merquryfk/katgc -entry test_merquryfk_katgc_png -c ./tests/config/nextflow.config -c ./tests/modules/merquryfk/katgc/nextflow.config
tags:
- merquryfk
- merquryfk/katgc
files:
- path: output/merquryfk/test.fi.png
md5sum: bd4cf549d5c8eaaba82f78010f652dac
- path: output/merquryfk/test.ln.png
md5sum: 3f55f27a486c9be6187d71e3a570beb5
- path: output/merquryfk/test.st.png
md5sum: 636339e17af69c4b64f27f36d66edcd8
- name: merquryfk katgc test_merquryfk_katgc_pdf
command: nextflow run ./tests/modules/merquryfk/katgc -entry test_merquryfk_katgc_pdf -c ./tests/config/nextflow.config -c ./tests/modules/merquryfk/katgc/nextflow.config
tags:
- merquryfk
- merquryfk/katgc
files:
- path: output/merquryfk/test.fi.pdf
- path: output/merquryfk/test.ln.pdf
- path: output/merquryfk/test.st.pdf