Change Spades input (#1039)

* Change spades module

* correct meta map description

* adjust memory handling

* remove trailing whitespace

* fix hmm input

* Update modules/spades/main.nf

Co-authored-by: Harshil Patel <drpatelh@users.noreply.github.com>
This commit is contained in:
Daniel Straub 2021-11-15 12:53:07 +01:00 committed by GitHub
parent 171a2a2dbf
commit 661bdb645e
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 115 additions and 32 deletions

View file

@ -19,46 +19,57 @@ process SPADES {
}
input:
tuple val(meta), path(reads)
tuple val(meta), path(illumina), path(pacbio), path(nanopore)
path hmm
output:
tuple val(meta), path('*.scaffolds.fa') , optional:true, emit: scaffolds
tuple val(meta), path('*.contigs.fa') , optional:true, emit: contigs
tuple val(meta), path('*.transcripts.fa') , optional:true, emit: transcripts
tuple val(meta), path('*.gene_clusters.fa'), optional:true, emit: gene_clusters
tuple val(meta), path('*.assembly.gfa') , optional:true, emit: gfa
tuple val(meta), path('*.log') , emit: log
path "versions.yml" , emit: versions
tuple val(meta), path('*.scaffolds.fa.gz') , optional:true, emit: scaffolds
tuple val(meta), path('*.contigs.fa.gz') , optional:true, emit: contigs
tuple val(meta), path('*.transcripts.fa.gz') , optional:true, emit: transcripts
tuple val(meta), path('*.gene_clusters.fa.gz'), optional:true, emit: gene_clusters
tuple val(meta), path('*.assembly.gfa.gz') , optional:true, emit: gfa
tuple val(meta), path('*.log') , emit: log
path "versions.yml" , emit: versions
script:
def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}"
def input_reads = meta.single_end ? "-s $reads" : "-1 ${reads[0]} -2 ${reads[1]}"
def custom_hmms = params.spades_hmm ? "--custom-hmms $hmm" : ""
def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}"
def maxmem = task.memory.toGiga()
def illumina_reads = illumina ? ( meta.single_end ? "-s $illumina" : "-1 ${illumina[0]} -2 ${illumina[1]}" ) : ""
def pacbio_reads = pacbio ? "--pacbio $pacbio" : ""
def nanopore_reads = nanopore ? "--nanopore $nanopore" : ""
def custom_hmms = hmm ? "--custom-hmms $hmm" : ""
"""
spades.py \\
$options.args \\
--threads $task.cpus \\
--memory $maxmem \\
$custom_hmms \\
$input_reads \\
$illumina_reads \\
$pacbio_reads \\
$nanopore_reads \\
-o ./
mv spades.log ${prefix}.spades.log
if [ -f scaffolds.fasta ]; then
mv scaffolds.fasta ${prefix}.scaffolds.fa
gzip -n ${prefix}.scaffolds.fa
fi
if [ -f contigs.fasta ]; then
mv contigs.fasta ${prefix}.contigs.fa
gzip -n ${prefix}.contigs.fa
fi
if [ -f transcripts.fasta ]; then
mv transcripts.fasta ${prefix}.transcripts.fa
gzip -n ${prefix}.transcripts.fa
fi
if [ -f assembly_graph_with_scaffolds.gfa ]; then
mv assembly_graph_with_scaffolds.gfa ${prefix}.assembly.gfa
gzip -n ${prefix}.assembly.gfa
fi
if [ -f gene_clusters.fasta ]; then
mv gene_clusters.fasta ${prefix}.gene_clusters.fa
gzip -n ${prefix}.gene_clusters.fa
fi
cat <<-END_VERSIONS > versions.yml

View file

@ -20,11 +20,20 @@ input:
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- reads:
- illumina:
type: file
description: |
List of input FastQ files of size 1 and 2 for single-end and paired-end data,
respectively.
List of input FastQ (Illumina or PacBio CCS reads) files
of size 1 and 2 for single-end and paired-end data,
respectively. This input data type is required.
- pacbio:
type: file
description: |
List of input PacBio CLR FastQ files of size 1.
- nanopore:
type: file
description: |
List of input FastQ files of size 1, originating from Oxford Nanopore technology.
- hmm:
type: file
description:
@ -39,26 +48,32 @@ output:
type: file
description: |
Fasta file containing scaffolds
pattern: "*.fa.gz"
- contigs:
type: file
description: |
Fasta file containing contigs
pattern: "*.fa.gz"
- transcripts:
type: file
description: |
Fasta file containing transcripts
pattern: "*.fa.gz"
- gene_clusters:
type: file
description: |
Fasta file containing gene_clusters
pattern: "*.fa.gz"
- gfa:
type: file
description: |
gfa file containing assembly
pattern: "*.gfa.gz"
- log:
type: file
description: |
Spades log file
pattern: "*.log"
- versions:
type: file
description: File containing software versions
@ -67,3 +82,4 @@ output:
authors:
- "@JoseEspinosa"
- "@drpatelh"
- "@d4straub"

View file

@ -2,11 +2,13 @@
nextflow.enable.dsl = 2
include { SPADES } from '../../../modules/spades/main.nf' addParams( spades_hmm: false ,options: ['args': '--rnaviral'] )
include { SPADES } from '../../../modules/spades/main.nf' addParams( options: ['args': '--rnaviral'] )
workflow test_spades_single_end {
input = [ [ id:'test', single_end:true ], // meta map
[ file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ]
[ file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ],
[],
[]
]
SPADES ( input, [] )
}
@ -14,7 +16,32 @@ workflow test_spades_single_end {
workflow test_spades_paired_end {
input = [ [ id:'test', single_end:false ], // meta map
[ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ]
file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ],
[],
[]
]
SPADES ( input, [] )
}
workflow test_spades_illumina_nanopore {
input = [ [ id:'test', single_end:false ], // meta map
[ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ],
[],
[ file(params.test_data['sarscov2']['nanopore']['test_fastq_gz'], checkIfExists: true) ]
]
SPADES ( input, [] )
}
// that isnt perfect, because CCS reads should rather be used with -s instead of --pacbio
workflow test_spades_illumina_pacbio {
input = [ [ id:'test', single_end:false ], // meta map
[ file(params.test_data['homo_sapiens']['illumina']['test_1_fastq_gz'], checkIfExists: true),
file(params.test_data['homo_sapiens']['illumina']['test_2_fastq_gz'], checkIfExists: true) ],
[ file(params.test_data['homo_sapiens']['pacbio']['ccs_fq_gz'], checkIfExists: true) ],
[]
]
SPADES ( input, [] )

View file

@ -1,23 +1,52 @@
- name: spades single end
command: nextflow run ./tests/modules/spades -entry test_spades_single_end -c tests/config/nextflow.config
- name: spades test_spades_single_end
command: nextflow run tests/modules/spades -entry test_spades_single_end -c tests/config/nextflow.config
tags:
- spades
files:
- path: output/spades/test.assembly.gfa
md5sum: a995d1d413031534180d2b3b715fa921
- path: output/spades/test.contigs.fa
md5sum: 65ba6a517c152dbe219bf4b5b92bdad7
- path: output/spades/test.scaffolds.fa
md5sum: 65ba6a517c152dbe219bf4b5b92bdad7
- path: output/spades/test.assembly.gfa.gz
md5sum: e5eab229363a906954a07df00e2495a6
- path: output/spades/test.contigs.fa.gz
md5sum: 64f6b339872b934138c6efd6baa445f4
- path: output/spades/test.scaffolds.fa.gz
md5sum: 64f6b339872b934138c6efd6baa445f4
- path: output/spades/test.spades.log
- name: spades paired end
command: nextflow run ./tests/modules/spades -entry test_spades_paired_end -c tests/config/nextflow.config
- name: spades test_spades_paired_end
command: nextflow run tests/modules/spades -entry test_spades_paired_end -c tests/config/nextflow.config
tags:
- spades
files:
- path: output/spades/test.assembly.gfa
md5sum: bb053ef4e9250829c980ca17fbdbe3e9
- path: output/spades/test.contigs.fa
md5sum: 4476d409da70d9f7fc2aa8f25bbaf7fd
- path: output/spades/test.assembly.gfa.gz
md5sum: c8614fb69907ae832a1359a054af240f
- path: output/spades/test.contigs.fa.gz
md5sum: eab5165b3cda96c235aaa1388010cb27
- path: output/spades/test.spades.log
- path: output/spades/warnings.log
- name: spades test_spades_illumina_nanopore
command: nextflow run tests/modules/spades -entry test_spades_illumina_nanopore -c tests/config/nextflow.config
tags:
- spades
files:
- path: output/spades/test.assembly.gfa.gz
md5sum: e438534f14e107f005efdd659adeba6a
- path: output/spades/test.contigs.fa.gz
md5sum: 027b0e54bfd8f4bc359e751e094133ef
- path: output/spades/test.scaffolds.fa.gz
md5sum: 027b0e54bfd8f4bc359e751e094133ef
- path: output/spades/test.spades.log
- path: output/spades/warnings.log
- name: spades test_spades_illumina_pacbio
command: nextflow run tests/modules/spades -entry test_spades_illumina_pacbio -c tests/config/nextflow.config
tags:
- spades
files:
- path: output/spades/test.assembly.gfa.gz
md5sum: e12aaf83d8dbfc313339b7636ba43447
- path: output/spades/test.contigs.fa.gz
md5sum: 78523f66d34ac4d5a4890f353c1a6ec6
- path: output/spades/test.scaffolds.fa.gz
md5sum: 78523f66d34ac4d5a4890f353c1a6ec6
- path: output/spades/test.spades.log
- path: output/spades/warnings.log