New module: fasta_windows (#1956)

* New module for fasta_windows

* Upgraded the code and the test to v2.4

* Standard list of Fasta file extensions

Co-authored-by: James A. Fellows Yates <jfy133@gmail.com>

* Output file (and/or directory) names SHOULD just consist of only ${prefix}

Co-authored-by: James A. Fellows Yates <jfy133@gmail.com>
This commit is contained in:
Matthieu Muffato 2022-09-09 08:41:38 +01:00 committed by GitHub
parent b444b084fa
commit 4f78d40b11
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 136 additions and 0 deletions

View file

@ -0,0 +1,40 @@
process FASTAWINDOWS {
tag "$meta.id"
label 'process_low'
conda (params.enable_conda ? "bioconda::fasta_windows=0.2.4" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/fasta_windows:0.2.4--hec16e2b_0':
'quay.io/biocontainers/fasta_windows:0.2.4--hec16e2b_0' }"
input:
tuple val(meta), path(fasta)
output:
tuple val(meta), path("fw_out/*_freq_windows.tsv") , emit: freq
tuple val(meta), path("fw_out/*_mononuc_windows.tsv") , emit: mononuc
tuple val(meta), path("fw_out/*_dinuc_windows.tsv") , emit: dinuc
tuple val(meta), path("fw_out/*_trinuc_windows.tsv") , emit: trinuc
tuple val(meta), path("fw_out/*_tetranuc_windows.tsv"), emit: tetranuc
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
"""
rm -rf fw_out
env RAYON_NUM_THREADS=$task.cpus \\
fasta_windows \\
$args \\
--fasta $fasta \\
--output ${prefix}
cat <<-END_VERSIONS > versions.yml
"${task.process}":
fasta_windows: \$(fasta_windows --version | cut -d' ' -f3)
END_VERSIONS
"""
}

View file

@ -0,0 +1,57 @@
name: "fastawindows"
description: Quickly compute statistics over a fasta file in windows.
keywords:
- genome
- fasta
- tsv
- bed
tools:
- "fastawindows":
description: "fasta_windows is a tool written for Darwin Tree of Life chromosomal level genome assemblies. The executable takes a fasta formatted file and calculates some statistics of interest in windows"
homepage: "https://github.com/tolkit/fasta_windows"
documentation: "None"
licence: "['MIT']"
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- fasta:
type: file
description: FASTA file
pattern: "*.{fa,fasta,fna}"
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- freq:
type: file
description: TSV file with frequencies and statistics
pattern: "*.{tsv}"
- mononuc:
type: file
description: TSV file with mononucleotide counts
pattern: "*.{tsv}"
- dinuc:
type: file
description: TSV file with dinucleotide counts
pattern: "*.{tsv}"
- trinuc:
type: file
description: TSV file with trinucleotide counts
pattern: "*.{tsv}"
- tetranuc:
type: file
description: TSV file with tetranucleotide counts
pattern: "*.{tsv}"
authors:
- "@muffato"

View file

@ -739,6 +739,10 @@ fastani:
- modules/fastani/**
- tests/modules/fastani/**
fastawindows:
- modules/fastawindows/**
- tests/modules/fastawindows/**
fastk/fastk:
- modules/fastk/fastk/**
- tests/modules/fastk/fastk/**

View file

@ -0,0 +1,15 @@
#!/usr/bin/env nextflow
nextflow.enable.dsl = 2
include { FASTAWINDOWS } from '../../../modules/fastawindows/main.nf'
workflow test_fastawindows {
input = [
[ id:'test' ], // meta map
file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
]
FASTAWINDOWS ( input )
}

View file

@ -0,0 +1,5 @@
process {
publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
}

View file

@ -0,0 +1,15 @@
- name: fastawindows test_fastawindows
command: nextflow run ./tests/modules/fastawindows -entry test_fastawindows -c ./tests/config/nextflow.config -c ./tests/modules/fastawindows/nextflow.config
tags:
- fastawindows
files:
- path: output/fastawindows/fw_out/test_freq_windows.tsv
md5sum: 237d50ac5ec2bef3142020d569fa5765
- path: output/fastawindows/fw_out/test_mononuc_windows.tsv
md5sum: a1b4437d0c71d9cfd676de6bda2633f0
- path: output/fastawindows/fw_out/test_dinuc_windows.tsv
md5sum: 696a9f2a4b2114dfbd6b414694f56a11
- path: output/fastawindows/fw_out/test_trinuc_windows.tsv
md5sum: dfb05b758f0474e937e2d6ba6fe46dae
- path: output/fastawindows/fw_out/test_tetranuc_windows.tsv
md5sum: e621537175ee8019360f8b6e8f4330b7