Add module kat hist

This commit is contained in:
Mahesh Binzer-Panchal 2022-05-06 07:54:10 +00:00
parent 6bb9be38c5
commit 9194fb845e
6 changed files with 177 additions and 0 deletions

42
modules/kat/hist/main.nf Normal file
View file

@ -0,0 +1,42 @@
process KAT_HIST {
tag "$meta.id"
label 'process_medium'
conda (params.enable_conda ? "bioconda::kat=2.4.2" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/kat:2.4.2--py38hfc5f9d8_2':
'quay.io/biocontainers/kat:2.4.2--py38hfc5f9d8_2' }"
input:
tuple val(meta), path(reads)
output:
tuple val(meta), path("*.hist") , emit: hist
tuple val(meta), path("*.hist.dist_analysis.json"), emit: json
tuple val(meta), path("*.png") , emit: png , optional: true
tuple val(meta), path("*.ps") , emit: ps , optional: true
tuple val(meta), path("*.pdf") , emit: pdf , optional: true
tuple val(meta), path("*-hash.jf*") , emit: jellyfish_hash, optional: true
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
"""
kat hist \\
--threads $task.cpus \\
--output_prefix ${prefix}.hist \\
$args \\
$reads
ls -l
cat <<-END_VERSIONS > versions.yml
"${task.process}":
kat: \$( kat hist --version | sed 's/kat //' )
END_VERSIONS
"""
}

64
modules/kat/hist/meta.yml Normal file
View file

@ -0,0 +1,64 @@
name: "kat_hist"
description: Creates a histogram of the number of distinct k-mers having a given frequency.
keywords:
- k-mer
- histogram
- count
tools:
- "kat":
description: "KAT is a suite of tools that analyse jellyfish hashes or sequence files (fasta or fastq) using kmer counts"
homepage: https://www.earlham.ac.uk/kat-tools
documentation: https://kat.readthedocs.io/en/latest/index.html
tool_dev_url: https://github.com/TGAC/KAT
doi: http://bioinformatics.oxfordjournals.org/content/early/2016/10/20/bioinformatics.btw663.abstract
licence: "['GPL v3']"
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- reads:
type: file
description: |
List of input FastQ files of size 1 and 2 for single-end and paired-end data,
respectively.
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- hist:
type: file
description: KAT histogram of k-mer counts
pattern: "*.hist"
- json:
type: file
description: KAT histogram summary of distance analysis
pattern: "*.hist.dist_analysis.json"
- png:
type: file
description: KAT plot of k-mer histogram in PNG format
pattern: "*.png"
- ps:
type: file
description: KAT plot of k-mer histogram in PS format
pattern: "*.ps"
- pdf:
type: file
description: KAT plot of k-mer histogram in PDF format
pattern: "*.pdf"
- jellyfish_hash:
type: file
description: Jellyfish hash file
pattern: "*-hist.jf*"
authors:
- "@mahesh-panchal"

View file

@ -1041,6 +1041,10 @@ kallistobustools/ref:
- modules/kallistobustools/ref/** - modules/kallistobustools/ref/**
- tests/modules/kallistobustools/ref/** - tests/modules/kallistobustools/ref/**
kat/hist:
- modules/kat/hist/**
- tests/modules/kat/hist/**
khmer/normalizebymedian: khmer/normalizebymedian:
- modules/khmer/normalizebymedian/** - modules/khmer/normalizebymedian/**
- tests/modules/khmer/normalizebymedian/** - tests/modules/khmer/normalizebymedian/**

View file

@ -0,0 +1,28 @@
#!/usr/bin/env nextflow
nextflow.enable.dsl = 2
include { KAT_HIST } from '../../../../modules/kat/hist/main.nf'
workflow test_kat_hist_single_end {
input = [
[ id:'test', single_end:true ], // meta map
file(params.test_data['homo_sapiens']['illumina']['test_1_fastq_gz'], checkIfExists: true)
]
KAT_HIST ( input )
}
workflow test_kat_hist_paired_end {
input = [
[ id:'test', single_end:false ], // meta map
[
file(params.test_data['homo_sapiens']['illumina']['test_1_fastq_gz'], checkIfExists: true),
file(params.test_data['homo_sapiens']['illumina']['test_2_fastq_gz'], checkIfExists: true),
]
]
KAT_HIST ( input )
}

View file

@ -0,0 +1,9 @@
process {
publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
withName: 'test_kat_hist_single_end:KAT_HIST' {
ext.args = '-d'
}
}

View file

@ -0,0 +1,30 @@
- name: kat hist test_kat_hist_single_end
command: nextflow run tests/modules/kat/hist -entry test_kat_hist_single_end -c tests/config/nextflow.config
tags:
- kat/hist
- kat
files:
- path: output/kat/test.hist
md5sum: 1aceb823b6774f14f5cca15954138efd
- path: output/kat/test.hist-hash.jf27
- path: output/kat/test.hist.dist_analysis.json
md5sum: ec4317d510f752855411d13c0f24dea9
- path: output/kat/test.hist.png
md5sum: 30daaf4d2dd4b1574e387df973c39d03
- path: output/kat/versions.yml
md5sum: 296f28c007bc55f8f5490702cf3f81d4
- name: kat hist test_kat_hist_paired_end
command: nextflow run tests/modules/kat/hist -entry test_kat_hist_paired_end -c tests/config/nextflow.config
tags:
- kat/hist
- kat
files:
- path: output/kat/test.hist
md5sum: c4e8e01996bd8f676e02690220e1def7
- path: output/kat/test.hist.dist_analysis.json
md5sum: 954a3d1c3fc56f80f0929315ed299824
- path: output/kat/test.hist.png
md5sum: 8a21ce0965342234f5982f947f4f10b0
- path: output/kat/versions.yml
md5sum: 3a07329af7ce78e76b021996cd08eacb