Dastool update to allow non-gzipped inputs (#1458)

* fix: remove left-over unnecessary code

* Make gzipping optional for DAS_Tool scaffolds2bin

* Add optional unzipping

* Make gunzip optional for DAS_Tool scaffolds2bin

* Apply suggestions from code review

Co-authored-by: Daniel Straub <42973691+d4straub@users.noreply.github.com>

* Update modules/dastool/scaffolds2bin/meta.yml

Co-authored-by: Daniel Straub <42973691+d4straub@users.noreply.github.com>
This commit is contained in:
James A. Fellows Yates 2022-03-28 16:29:46 +02:00 committed by GitHub
parent 1f483d9203
commit 240ee4328c
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 46 additions and 19 deletions

View file

@ -22,9 +22,10 @@ process DASTOOL_SCAFFOLDS2BIN {
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def file_extension = extension ? extension : "fasta"
def clean_fasta = fasta.toString() - ".gz"
def decompress_fasta = fasta.toString() == clean_fasta ? "" : "gunzip -q -f $fasta"
"""
gunzip -f *.${file_extension}.gz
$decompress_fasta
Fasta_to_Scaffolds2Bin.sh \\
$args \\

View file

@ -30,14 +30,11 @@ input:
e.g. [ id:'test', single_end:false ]
- fasta:
type: file
description: Fasta of list of fasta files recommended to be gathered via with .collect() of bins
pattern: "*.{fa,fas,fasta}"
- binner:
type: val
description: Name of the binning software (optional)
description: Fasta or list of fasta files recommended to be gathered via with .collect() of bins
pattern: "*.{fa,fa.gz,fas,fas.gz,fna,fna.gz,fasta,fasta.gz}"
- extension:
type: val
description: Fasta file extension (fa | fas | fasta | ...)
description: Fasta file extension (fa | fas | fasta | ...), but without .gz suffix, even if gzipped input.
output:
- meta:

View file

@ -2,9 +2,10 @@
nextflow.enable.dsl = 2
include { METABAT2_METABAT2 } from '../../../../modules/metabat2/metabat2/main.nf'
include { METABAT2_JGISUMMARIZEBAMCONTIGDEPTHS } from '../../../../modules/metabat2/jgisummarizebamcontigdepths/main.nf'
include { DASTOOL_SCAFFOLDS2BIN } from '../../../../modules/dastool/scaffolds2bin/main.nf'
include { GUNZIP } from '../../../../modules/gunzip/main.nf'
include { METABAT2_METABAT2 } from '../../../../modules/metabat2/metabat2/main.nf'
include { METABAT2_JGISUMMARIZEBAMCONTIGDEPTHS } from '../../../../modules/metabat2/jgisummarizebamcontigdepths/main.nf'
include { DASTOOL_SCAFFOLDS2BIN } from '../../../../modules/dastool/scaffolds2bin/main.nf'
workflow test_dastool_scaffolds2bin {
@ -23,3 +24,25 @@ workflow test_dastool_scaffolds2bin {
DASTOOL_SCAFFOLDS2BIN ( METABAT2_METABAT2.out.fasta.collect(), "fa")
}
workflow test_dastool_scaffolds2bin_ungzipped {
input_depth = [ [ id:'test', single_end:false ], // meta map
file(params.test_data['bacteroides_fragilis']['illumina']['test1_paired_end_sorted_bam'], checkIfExists: true),
file(params.test_data['bacteroides_fragilis']['illumina']['test1_paired_end_sorted_bam_bai'], checkIfExists: true) ]
METABAT2_JGISUMMARIZEBAMCONTIGDEPTHS ( input_depth )
Channel.fromPath(params.test_data['bacteroides_fragilis']['genome']['genome_fna_gz'], checkIfExists: true)
.map { it -> [[ id:'test', single_end:false ], it] }
.join(METABAT2_JGISUMMARIZEBAMCONTIGDEPTHS.out.depth)
.set { input_metabat2 }
METABAT2_METABAT2 ( input_metabat2 )
// TODO test unzipped input files
ch_input_2_scaffolds2bin = GUNZIP( METABAT2_METABAT2.out.fasta ).gunzip
DASTOOL_SCAFFOLDS2BIN ( ch_input_2_scaffolds2bin, "fa")
}

View file

@ -1,14 +1,20 @@
- name: dastool scaffolds2bin test_dastool_scaffolds2bin
command: nextflow run ./tests/modules/dastool/scaffolds2bin -entry test_dastool_scaffolds2bin -c ./tests/config/nextflow.config -c ./tests/modules/dastool/scaffolds2bin/nextflow.config
command: nextflow run tests/modules/dastool/scaffolds2bin -entry test_dastool_scaffolds2bin -c tests/config/nextflow.config
tags:
- dastool
- dastool/scaffolds2bin
files:
- path: output/dastool/test.tsv
md5sum: 6e46c0be14dded7cb13af38f54feea47
- path: output/metabat2/bins/test.1.fa.gz
md5sum: 2b297bf557cc3831b800348859331268
- path: output/metabat2/test.tsv.gz
md5sum: 619338fa5019e361d5545ce385a6961f
- path: output/metabat2/test.txt.gz
md5sum: 745a0446af6ef68b930975e9ce5a95d6
- path: output/dastool/versions.yml
md5sum: d0831ed159eb5a1a1565d1d211012ad6
- name: dastool scaffolds2bin test_dastool_scaffolds2bin_ungzipped
command: nextflow run tests/modules/dastool/scaffolds2bin -entry test_dastool_scaffolds2bin_ungzipped -c tests/config/nextflow.config
tags:
- dastool
- dastool/scaffolds2bin
files:
- path: output/dastool/test.tsv
md5sum: 6e46c0be14dded7cb13af38f54feea47
- path: output/dastool/versions.yml
md5sum: da58e477b7f4c16a9ea495ec1a4a4d4f