feat: add module for seqkit stats (#1466)

This commit is contained in:
Moritz E. Beber 2022-03-30 23:01:17 +02:00 committed by GitHub
parent fd5f6f5f4f
commit 0de6406217
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 199 additions and 0 deletions

View file

@ -0,0 +1,34 @@
process SEQKIT_STATS {
tag "$meta.id"
label 'process_low'
conda (params.enable_conda ? "bioconda::seqkit=2.2.0" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/seqkit:2.2.0--h9ee0642_0':
'quay.io/biocontainers/seqkit:2.2.0--h9ee0642_0' }"
input:
tuple val(meta), path(reads)
output:
tuple val(meta), path("*.tsv"), emit: stats
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: '--all'
def prefix = task.ext.prefix ?: "${meta.id}"
"""
seqkit stats \\
--tabular \\
$args \\
$reads > '${prefix}.tsv'
cat <<-END_VERSIONS > versions.yml
"${task.process}":
seqkit: \$( seqkit version | sed 's/seqkit v//' )
END_VERSIONS
"""
}

View file

@ -0,0 +1,44 @@
name: "seqkit_stats"
description: simple statistics of FASTA/Q files
keywords:
- seqkit
- stats
tools:
- "seqkit":
description: Cross-platform and ultrafast toolkit for FASTA/Q file manipulation, written by Wei Shen.
homepage: https://bioinf.shenwei.me/seqkit/usage/
documentation: https://bioinf.shenwei.me/seqkit/usage/
tool_dev_url: https://github.com/shenwei356/seqkit/
doi: "10.1371/journal.pone.0163962"
licence: ["MIT"]
input:
- meta:
type: map
description: >
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- reads:
type: file
description: >
Either FASTA or FASTQ files.
pattern: "*.{fa,fna,faa,fasta,fq,fastq}[.gz]"
output:
- meta:
type: map
description: >
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- stats:
type: file
description: >
Tab-separated output file with basic sequence statistics.
pattern: "*.tsv"
authors:
- "@Midnighter"

View file

@ -1591,6 +1591,10 @@ seqkit/split2:
- modules/seqkit/split2/** - modules/seqkit/split2/**
- tests/modules/seqkit/split2/** - tests/modules/seqkit/split2/**
seqkit/stats:
- modules/seqkit/stats/**
- tests/modules/seqkit/stats/**
seqsero2: seqsero2:
- modules/seqsero2/** - modules/seqsero2/**
- tests/modules/seqsero2/** - tests/modules/seqsero2/**

View file

@ -0,0 +1,58 @@
#!/usr/bin/env nextflow
nextflow.enable.dsl = 2
include { SEQKIT_STATS } from '../../../../modules/seqkit/stats/main.nf'
workflow test_seqkit_stats_single_end {
input = [
[ id:'test', single_end:true ], // meta map
file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true)
]
SEQKIT_STATS ( input )
}
workflow test_seqkit_stats_paired_end {
input = [
[ id:'test', single_end:false ], // meta map
[
file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true)
]
]
SEQKIT_STATS ( input )
}
workflow test_seqkit_stats_nanopore {
input = [
[ id:'test', single_end:false ], // meta map
file(params.test_data['sarscov2']['nanopore']['test_fastq_gz'], checkIfExists: true),
]
SEQKIT_STATS ( input )
}
workflow test_seqkit_stats_genome_fasta {
input = [
[ id:'test', single_end:false ], // meta map
file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true),
]
SEQKIT_STATS ( input )
}
workflow test_seqkit_stats_transcriptome_fasta {
input = [
[ id:'test', single_end:false ], // meta map
file(params.test_data['sarscov2']['genome']['transcriptome_fasta'], checkIfExists: true),
]
SEQKIT_STATS ( input )
}

View file

@ -0,0 +1,5 @@
process {
publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
}

View file

@ -0,0 +1,54 @@
- name: seqkit stats test_seqkit_stats_single_end
command: nextflow run tests/modules/seqkit/stats -entry test_seqkit_stats_single_end -c tests/config/nextflow.config
tags:
- seqkit/stats
- seqkit
files:
- path: output/seqkit/test.tsv
md5sum: e23227d089a7e04b0ec0cb547c4aadff
- path: output/seqkit/versions.yml
md5sum: d67f0c16feb9df77b11f6c91bbdf9926
- name: seqkit stats test_seqkit_stats_paired_end
command: nextflow run tests/modules/seqkit/stats -entry test_seqkit_stats_paired_end -c tests/config/nextflow.config
tags:
- seqkit/stats
- seqkit
files:
- path: output/seqkit/test.tsv
md5sum: 9de20dc39fb01285e3f0c382fda9db52
- path: output/seqkit/versions.yml
md5sum: bd8881933b953d07f2600e2e6a88ebf3
- name: seqkit stats test_seqkit_stats_nanopore
command: nextflow run tests/modules/seqkit/stats -entry test_seqkit_stats_nanopore -c tests/config/nextflow.config
tags:
- seqkit/stats
- seqkit
files:
- path: output/seqkit/test.tsv
md5sum: 5da1709eb5ae64fa3b2d624bffe2e7aa
- path: output/seqkit/versions.yml
md5sum: 565632701fbe048f7ba99f1865bd48ca
- name: seqkit stats test_seqkit_stats_genome_fasta
command: nextflow run tests/modules/seqkit/stats -entry test_seqkit_stats_genome_fasta -c tests/config/nextflow.config
tags:
- seqkit/stats
- seqkit
files:
- path: output/seqkit/test.tsv
md5sum: f64489767a4e769539ef3faf83260184
- path: output/seqkit/versions.yml
md5sum: 782fcdeaa922c8bb532ffa5808849d87
- name: seqkit stats test_seqkit_stats_transcriptome_fasta
command: nextflow run tests/modules/seqkit/stats -entry test_seqkit_stats_transcriptome_fasta -c tests/config/nextflow.config
tags:
- seqkit/stats
- seqkit
files:
- path: output/seqkit/test.tsv
md5sum: fbb975b665a08c8862fcd1268613a945
- path: output/seqkit/versions.yml
md5sum: db99b016d986d26102ec398264a58410