From 661bdb645eb6dfef3dc3dd1a998bab5ce6f0706e Mon Sep 17 00:00:00 2001 From: Daniel Straub <42973691+d4straub@users.noreply.github.com> Date: Mon, 15 Nov 2021 12:53:07 +0100 Subject: [PATCH] Change Spades input (#1039) * Change spades module * correct meta map description * adjust memory handling * remove trailing whitespace * fix hmm input * Update modules/spades/main.nf Co-authored-by: Harshil Patel --- modules/spades/main.nf | 35 +++++++++++++-------- modules/spades/meta.yml | 22 ++++++++++++-- tests/modules/spades/main.nf | 33 ++++++++++++++++++-- tests/modules/spades/test.yml | 57 ++++++++++++++++++++++++++--------- 4 files changed, 115 insertions(+), 32 deletions(-) diff --git a/modules/spades/main.nf b/modules/spades/main.nf index c21066e2..836efbda 100644 --- a/modules/spades/main.nf +++ b/modules/spades/main.nf @@ -19,46 +19,57 @@ process SPADES { } input: - tuple val(meta), path(reads) + tuple val(meta), path(illumina), path(pacbio), path(nanopore) path hmm output: - tuple val(meta), path('*.scaffolds.fa') , optional:true, emit: scaffolds - tuple val(meta), path('*.contigs.fa') , optional:true, emit: contigs - tuple val(meta), path('*.transcripts.fa') , optional:true, emit: transcripts - tuple val(meta), path('*.gene_clusters.fa'), optional:true, emit: gene_clusters - tuple val(meta), path('*.assembly.gfa') , optional:true, emit: gfa - tuple val(meta), path('*.log') , emit: log - path "versions.yml" , emit: versions + tuple val(meta), path('*.scaffolds.fa.gz') , optional:true, emit: scaffolds + tuple val(meta), path('*.contigs.fa.gz') , optional:true, emit: contigs + tuple val(meta), path('*.transcripts.fa.gz') , optional:true, emit: transcripts + tuple val(meta), path('*.gene_clusters.fa.gz'), optional:true, emit: gene_clusters + tuple val(meta), path('*.assembly.gfa.gz') , optional:true, emit: gfa + tuple val(meta), path('*.log') , emit: log + path "versions.yml" , emit: versions script: - def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}" - def input_reads = meta.single_end ? "-s $reads" : "-1 ${reads[0]} -2 ${reads[1]}" - def custom_hmms = params.spades_hmm ? "--custom-hmms $hmm" : "" + def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}" + def maxmem = task.memory.toGiga() + def illumina_reads = illumina ? ( meta.single_end ? "-s $illumina" : "-1 ${illumina[0]} -2 ${illumina[1]}" ) : "" + def pacbio_reads = pacbio ? "--pacbio $pacbio" : "" + def nanopore_reads = nanopore ? "--nanopore $nanopore" : "" + def custom_hmms = hmm ? "--custom-hmms $hmm" : "" """ spades.py \\ $options.args \\ --threads $task.cpus \\ + --memory $maxmem \\ $custom_hmms \\ - $input_reads \\ + $illumina_reads \\ + $pacbio_reads \\ + $nanopore_reads \\ -o ./ mv spades.log ${prefix}.spades.log if [ -f scaffolds.fasta ]; then mv scaffolds.fasta ${prefix}.scaffolds.fa + gzip -n ${prefix}.scaffolds.fa fi if [ -f contigs.fasta ]; then mv contigs.fasta ${prefix}.contigs.fa + gzip -n ${prefix}.contigs.fa fi if [ -f transcripts.fasta ]; then mv transcripts.fasta ${prefix}.transcripts.fa + gzip -n ${prefix}.transcripts.fa fi if [ -f assembly_graph_with_scaffolds.gfa ]; then mv assembly_graph_with_scaffolds.gfa ${prefix}.assembly.gfa + gzip -n ${prefix}.assembly.gfa fi if [ -f gene_clusters.fasta ]; then mv gene_clusters.fasta ${prefix}.gene_clusters.fa + gzip -n ${prefix}.gene_clusters.fa fi cat <<-END_VERSIONS > versions.yml diff --git a/modules/spades/meta.yml b/modules/spades/meta.yml index 3d5943ae..b6878d3d 100644 --- a/modules/spades/meta.yml +++ b/modules/spades/meta.yml @@ -20,11 +20,20 @@ input: description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - - reads: + - illumina: type: file description: | - List of input FastQ files of size 1 and 2 for single-end and paired-end data, - respectively. + List of input FastQ (Illumina or PacBio CCS reads) files + of size 1 and 2 for single-end and paired-end data, + respectively. This input data type is required. + - pacbio: + type: file + description: | + List of input PacBio CLR FastQ files of size 1. + - nanopore: + type: file + description: | + List of input FastQ files of size 1, originating from Oxford Nanopore technology. - hmm: type: file description: @@ -39,26 +48,32 @@ output: type: file description: | Fasta file containing scaffolds + pattern: "*.fa.gz" - contigs: type: file description: | Fasta file containing contigs + pattern: "*.fa.gz" - transcripts: type: file description: | Fasta file containing transcripts + pattern: "*.fa.gz" - gene_clusters: type: file description: | Fasta file containing gene_clusters + pattern: "*.fa.gz" - gfa: type: file description: | gfa file containing assembly + pattern: "*.gfa.gz" - log: type: file description: | Spades log file + pattern: "*.log" - versions: type: file description: File containing software versions @@ -67,3 +82,4 @@ output: authors: - "@JoseEspinosa" - "@drpatelh" + - "@d4straub" diff --git a/tests/modules/spades/main.nf b/tests/modules/spades/main.nf index a8518a0e..b09a4266 100644 --- a/tests/modules/spades/main.nf +++ b/tests/modules/spades/main.nf @@ -2,11 +2,13 @@ nextflow.enable.dsl = 2 -include { SPADES } from '../../../modules/spades/main.nf' addParams( spades_hmm: false ,options: ['args': '--rnaviral'] ) +include { SPADES } from '../../../modules/spades/main.nf' addParams( options: ['args': '--rnaviral'] ) workflow test_spades_single_end { input = [ [ id:'test', single_end:true ], // meta map - [ file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ] + [ file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ], + [], + [] ] SPADES ( input, [] ) } @@ -14,7 +16,32 @@ workflow test_spades_single_end { workflow test_spades_paired_end { input = [ [ id:'test', single_end:false ], // meta map [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), - file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ] + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ], + [], + [] + ] + + SPADES ( input, [] ) +} + +workflow test_spades_illumina_nanopore { + input = [ [ id:'test', single_end:false ], // meta map + [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ], + [], + [ file(params.test_data['sarscov2']['nanopore']['test_fastq_gz'], checkIfExists: true) ] + ] + + SPADES ( input, [] ) +} + +// that isnt perfect, because CCS reads should rather be used with -s instead of --pacbio +workflow test_spades_illumina_pacbio { + input = [ [ id:'test', single_end:false ], // meta map + [ file(params.test_data['homo_sapiens']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_2_fastq_gz'], checkIfExists: true) ], + [ file(params.test_data['homo_sapiens']['pacbio']['ccs_fq_gz'], checkIfExists: true) ], + [] ] SPADES ( input, [] ) diff --git a/tests/modules/spades/test.yml b/tests/modules/spades/test.yml index 35beb1a7..a400e79d 100644 --- a/tests/modules/spades/test.yml +++ b/tests/modules/spades/test.yml @@ -1,23 +1,52 @@ -- name: spades single end - command: nextflow run ./tests/modules/spades -entry test_spades_single_end -c tests/config/nextflow.config +- name: spades test_spades_single_end + command: nextflow run tests/modules/spades -entry test_spades_single_end -c tests/config/nextflow.config tags: - spades files: - - path: output/spades/test.assembly.gfa - md5sum: a995d1d413031534180d2b3b715fa921 - - path: output/spades/test.contigs.fa - md5sum: 65ba6a517c152dbe219bf4b5b92bdad7 - - path: output/spades/test.scaffolds.fa - md5sum: 65ba6a517c152dbe219bf4b5b92bdad7 + - path: output/spades/test.assembly.gfa.gz + md5sum: e5eab229363a906954a07df00e2495a6 + - path: output/spades/test.contigs.fa.gz + md5sum: 64f6b339872b934138c6efd6baa445f4 + - path: output/spades/test.scaffolds.fa.gz + md5sum: 64f6b339872b934138c6efd6baa445f4 - path: output/spades/test.spades.log -- name: spades paired end - command: nextflow run ./tests/modules/spades -entry test_spades_paired_end -c tests/config/nextflow.config +- name: spades test_spades_paired_end + command: nextflow run tests/modules/spades -entry test_spades_paired_end -c tests/config/nextflow.config tags: - spades files: - - path: output/spades/test.assembly.gfa - md5sum: bb053ef4e9250829c980ca17fbdbe3e9 - - path: output/spades/test.contigs.fa - md5sum: 4476d409da70d9f7fc2aa8f25bbaf7fd + - path: output/spades/test.assembly.gfa.gz + md5sum: c8614fb69907ae832a1359a054af240f + - path: output/spades/test.contigs.fa.gz + md5sum: eab5165b3cda96c235aaa1388010cb27 - path: output/spades/test.spades.log + - path: output/spades/warnings.log + +- name: spades test_spades_illumina_nanopore + command: nextflow run tests/modules/spades -entry test_spades_illumina_nanopore -c tests/config/nextflow.config + tags: + - spades + files: + - path: output/spades/test.assembly.gfa.gz + md5sum: e438534f14e107f005efdd659adeba6a + - path: output/spades/test.contigs.fa.gz + md5sum: 027b0e54bfd8f4bc359e751e094133ef + - path: output/spades/test.scaffolds.fa.gz + md5sum: 027b0e54bfd8f4bc359e751e094133ef + - path: output/spades/test.spades.log + - path: output/spades/warnings.log + +- name: spades test_spades_illumina_pacbio + command: nextflow run tests/modules/spades -entry test_spades_illumina_pacbio -c tests/config/nextflow.config + tags: + - spades + files: + - path: output/spades/test.assembly.gfa.gz + md5sum: e12aaf83d8dbfc313339b7636ba43447 + - path: output/spades/test.contigs.fa.gz + md5sum: 78523f66d34ac4d5a4890f353c1a6ec6 + - path: output/spades/test.scaffolds.fa.gz + md5sum: 78523f66d34ac4d5a4890f353c1a6ec6 + - path: output/spades/test.spades.log + - path: output/spades/warnings.log