Dastool update to allow non-gzipped inputs (#1458)

* fix: remove left-over unnecessary code

* Make gzipping optional for DAS_Tool scaffolds2bin

* Add optional unzipping

* Make gunzip optional for DAS_Tool scaffolds2bin

* Apply suggestions from code review

Co-authored-by: Daniel Straub <42973691+d4straub@users.noreply.github.com>

* Update modules/dastool/scaffolds2bin/meta.yml

Co-authored-by: Daniel Straub <42973691+d4straub@users.noreply.github.com>
This commit is contained in:
James A. Fellows Yates 2022-03-28 16:29:46 +02:00 committed by GitHub
parent 1f483d9203
commit 240ee4328c
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 46 additions and 19 deletions

View file

@ -22,9 +22,10 @@ process DASTOOL_SCAFFOLDS2BIN {
def args = task.ext.args ?: '' def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}" def prefix = task.ext.prefix ?: "${meta.id}"
def file_extension = extension ? extension : "fasta" def file_extension = extension ? extension : "fasta"
def clean_fasta = fasta.toString() - ".gz"
def decompress_fasta = fasta.toString() == clean_fasta ? "" : "gunzip -q -f $fasta"
""" """
gunzip -f *.${file_extension}.gz $decompress_fasta
Fasta_to_Scaffolds2Bin.sh \\ Fasta_to_Scaffolds2Bin.sh \\
$args \\ $args \\

View file

@ -30,14 +30,11 @@ input:
e.g. [ id:'test', single_end:false ] e.g. [ id:'test', single_end:false ]
- fasta: - fasta:
type: file type: file
description: Fasta of list of fasta files recommended to be gathered via with .collect() of bins description: Fasta or list of fasta files recommended to be gathered via with .collect() of bins
pattern: "*.{fa,fas,fasta}" pattern: "*.{fa,fa.gz,fas,fas.gz,fna,fna.gz,fasta,fasta.gz}"
- binner:
type: val
description: Name of the binning software (optional)
- extension: - extension:
type: val type: val
description: Fasta file extension (fa | fas | fasta | ...) description: Fasta file extension (fa | fas | fasta | ...), but without .gz suffix, even if gzipped input.
output: output:
- meta: - meta:

View file

@ -2,9 +2,10 @@
nextflow.enable.dsl = 2 nextflow.enable.dsl = 2
include { METABAT2_METABAT2 } from '../../../../modules/metabat2/metabat2/main.nf' include { GUNZIP } from '../../../../modules/gunzip/main.nf'
include { METABAT2_JGISUMMARIZEBAMCONTIGDEPTHS } from '../../../../modules/metabat2/jgisummarizebamcontigdepths/main.nf' include { METABAT2_METABAT2 } from '../../../../modules/metabat2/metabat2/main.nf'
include { DASTOOL_SCAFFOLDS2BIN } from '../../../../modules/dastool/scaffolds2bin/main.nf' include { METABAT2_JGISUMMARIZEBAMCONTIGDEPTHS } from '../../../../modules/metabat2/jgisummarizebamcontigdepths/main.nf'
include { DASTOOL_SCAFFOLDS2BIN } from '../../../../modules/dastool/scaffolds2bin/main.nf'
workflow test_dastool_scaffolds2bin { workflow test_dastool_scaffolds2bin {
@ -22,4 +23,26 @@ workflow test_dastool_scaffolds2bin {
METABAT2_METABAT2 ( input_metabat2 ) METABAT2_METABAT2 ( input_metabat2 )
DASTOOL_SCAFFOLDS2BIN ( METABAT2_METABAT2.out.fasta.collect(), "fa") DASTOOL_SCAFFOLDS2BIN ( METABAT2_METABAT2.out.fasta.collect(), "fa")
} }
workflow test_dastool_scaffolds2bin_ungzipped {
input_depth = [ [ id:'test', single_end:false ], // meta map
file(params.test_data['bacteroides_fragilis']['illumina']['test1_paired_end_sorted_bam'], checkIfExists: true),
file(params.test_data['bacteroides_fragilis']['illumina']['test1_paired_end_sorted_bam_bai'], checkIfExists: true) ]
METABAT2_JGISUMMARIZEBAMCONTIGDEPTHS ( input_depth )
Channel.fromPath(params.test_data['bacteroides_fragilis']['genome']['genome_fna_gz'], checkIfExists: true)
.map { it -> [[ id:'test', single_end:false ], it] }
.join(METABAT2_JGISUMMARIZEBAMCONTIGDEPTHS.out.depth)
.set { input_metabat2 }
METABAT2_METABAT2 ( input_metabat2 )
// TODO test unzipped input files
ch_input_2_scaffolds2bin = GUNZIP( METABAT2_METABAT2.out.fasta ).gunzip
DASTOOL_SCAFFOLDS2BIN ( ch_input_2_scaffolds2bin, "fa")
}

View file

@ -1,14 +1,20 @@
- name: dastool scaffolds2bin test_dastool_scaffolds2bin - name: dastool scaffolds2bin test_dastool_scaffolds2bin
command: nextflow run ./tests/modules/dastool/scaffolds2bin -entry test_dastool_scaffolds2bin -c ./tests/config/nextflow.config -c ./tests/modules/dastool/scaffolds2bin/nextflow.config command: nextflow run tests/modules/dastool/scaffolds2bin -entry test_dastool_scaffolds2bin -c tests/config/nextflow.config
tags: tags:
- dastool - dastool
- dastool/scaffolds2bin - dastool/scaffolds2bin
files: files:
- path: output/dastool/test.tsv - path: output/dastool/test.tsv
md5sum: 6e46c0be14dded7cb13af38f54feea47 md5sum: 6e46c0be14dded7cb13af38f54feea47
- path: output/metabat2/bins/test.1.fa.gz - path: output/dastool/versions.yml
md5sum: 2b297bf557cc3831b800348859331268 md5sum: d0831ed159eb5a1a1565d1d211012ad6
- path: output/metabat2/test.tsv.gz - name: dastool scaffolds2bin test_dastool_scaffolds2bin_ungzipped
md5sum: 619338fa5019e361d5545ce385a6961f command: nextflow run tests/modules/dastool/scaffolds2bin -entry test_dastool_scaffolds2bin_ungzipped -c tests/config/nextflow.config
- path: output/metabat2/test.txt.gz tags:
md5sum: 745a0446af6ef68b930975e9ce5a95d6 - dastool
- dastool/scaffolds2bin
files:
- path: output/dastool/test.tsv
md5sum: 6e46c0be14dded7cb13af38f54feea47
- path: output/dastool/versions.yml
md5sum: da58e477b7f4c16a9ea495ec1a4a4d4f