Change Spades input (#1039)

* Change spades module

* correct meta map description

* adjust memory handling

* remove trailing whitespace

* fix hmm input

* Update modules/spades/main.nf

Co-authored-by: Harshil Patel <drpatelh@users.noreply.github.com>
This commit is contained in:
Daniel Straub 2021-11-15 12:53:07 +01:00 committed by GitHub
parent 171a2a2dbf
commit 661bdb645e
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 115 additions and 32 deletions

View file

@ -19,46 +19,57 @@ process SPADES {
} }
input: input:
tuple val(meta), path(reads) tuple val(meta), path(illumina), path(pacbio), path(nanopore)
path hmm path hmm
output: output:
tuple val(meta), path('*.scaffolds.fa') , optional:true, emit: scaffolds tuple val(meta), path('*.scaffolds.fa.gz') , optional:true, emit: scaffolds
tuple val(meta), path('*.contigs.fa') , optional:true, emit: contigs tuple val(meta), path('*.contigs.fa.gz') , optional:true, emit: contigs
tuple val(meta), path('*.transcripts.fa') , optional:true, emit: transcripts tuple val(meta), path('*.transcripts.fa.gz') , optional:true, emit: transcripts
tuple val(meta), path('*.gene_clusters.fa'), optional:true, emit: gene_clusters tuple val(meta), path('*.gene_clusters.fa.gz'), optional:true, emit: gene_clusters
tuple val(meta), path('*.assembly.gfa') , optional:true, emit: gfa tuple val(meta), path('*.assembly.gfa.gz') , optional:true, emit: gfa
tuple val(meta), path('*.log') , emit: log tuple val(meta), path('*.log') , emit: log
path "versions.yml" , emit: versions path "versions.yml" , emit: versions
script: script:
def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}" def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}"
def input_reads = meta.single_end ? "-s $reads" : "-1 ${reads[0]} -2 ${reads[1]}" def maxmem = task.memory.toGiga()
def custom_hmms = params.spades_hmm ? "--custom-hmms $hmm" : "" def illumina_reads = illumina ? ( meta.single_end ? "-s $illumina" : "-1 ${illumina[0]} -2 ${illumina[1]}" ) : ""
def pacbio_reads = pacbio ? "--pacbio $pacbio" : ""
def nanopore_reads = nanopore ? "--nanopore $nanopore" : ""
def custom_hmms = hmm ? "--custom-hmms $hmm" : ""
""" """
spades.py \\ spades.py \\
$options.args \\ $options.args \\
--threads $task.cpus \\ --threads $task.cpus \\
--memory $maxmem \\
$custom_hmms \\ $custom_hmms \\
$input_reads \\ $illumina_reads \\
$pacbio_reads \\
$nanopore_reads \\
-o ./ -o ./
mv spades.log ${prefix}.spades.log mv spades.log ${prefix}.spades.log
if [ -f scaffolds.fasta ]; then if [ -f scaffolds.fasta ]; then
mv scaffolds.fasta ${prefix}.scaffolds.fa mv scaffolds.fasta ${prefix}.scaffolds.fa
gzip -n ${prefix}.scaffolds.fa
fi fi
if [ -f contigs.fasta ]; then if [ -f contigs.fasta ]; then
mv contigs.fasta ${prefix}.contigs.fa mv contigs.fasta ${prefix}.contigs.fa
gzip -n ${prefix}.contigs.fa
fi fi
if [ -f transcripts.fasta ]; then if [ -f transcripts.fasta ]; then
mv transcripts.fasta ${prefix}.transcripts.fa mv transcripts.fasta ${prefix}.transcripts.fa
gzip -n ${prefix}.transcripts.fa
fi fi
if [ -f assembly_graph_with_scaffolds.gfa ]; then if [ -f assembly_graph_with_scaffolds.gfa ]; then
mv assembly_graph_with_scaffolds.gfa ${prefix}.assembly.gfa mv assembly_graph_with_scaffolds.gfa ${prefix}.assembly.gfa
gzip -n ${prefix}.assembly.gfa
fi fi
if [ -f gene_clusters.fasta ]; then if [ -f gene_clusters.fasta ]; then
mv gene_clusters.fasta ${prefix}.gene_clusters.fa mv gene_clusters.fasta ${prefix}.gene_clusters.fa
gzip -n ${prefix}.gene_clusters.fa
fi fi
cat <<-END_VERSIONS > versions.yml cat <<-END_VERSIONS > versions.yml

View file

@ -20,11 +20,20 @@ input:
description: | description: |
Groovy Map containing sample information Groovy Map containing sample information
e.g. [ id:'test', single_end:false ] e.g. [ id:'test', single_end:false ]
- reads: - illumina:
type: file type: file
description: | description: |
List of input FastQ files of size 1 and 2 for single-end and paired-end data, List of input FastQ (Illumina or PacBio CCS reads) files
respectively. of size 1 and 2 for single-end and paired-end data,
respectively. This input data type is required.
- pacbio:
type: file
description: |
List of input PacBio CLR FastQ files of size 1.
- nanopore:
type: file
description: |
List of input FastQ files of size 1, originating from Oxford Nanopore technology.
- hmm: - hmm:
type: file type: file
description: description:
@ -39,26 +48,32 @@ output:
type: file type: file
description: | description: |
Fasta file containing scaffolds Fasta file containing scaffolds
pattern: "*.fa.gz"
- contigs: - contigs:
type: file type: file
description: | description: |
Fasta file containing contigs Fasta file containing contigs
pattern: "*.fa.gz"
- transcripts: - transcripts:
type: file type: file
description: | description: |
Fasta file containing transcripts Fasta file containing transcripts
pattern: "*.fa.gz"
- gene_clusters: - gene_clusters:
type: file type: file
description: | description: |
Fasta file containing gene_clusters Fasta file containing gene_clusters
pattern: "*.fa.gz"
- gfa: - gfa:
type: file type: file
description: | description: |
gfa file containing assembly gfa file containing assembly
pattern: "*.gfa.gz"
- log: - log:
type: file type: file
description: | description: |
Spades log file Spades log file
pattern: "*.log"
- versions: - versions:
type: file type: file
description: File containing software versions description: File containing software versions
@ -67,3 +82,4 @@ output:
authors: authors:
- "@JoseEspinosa" - "@JoseEspinosa"
- "@drpatelh" - "@drpatelh"
- "@d4straub"

View file

@ -2,11 +2,13 @@
nextflow.enable.dsl = 2 nextflow.enable.dsl = 2
include { SPADES } from '../../../modules/spades/main.nf' addParams( spades_hmm: false ,options: ['args': '--rnaviral'] ) include { SPADES } from '../../../modules/spades/main.nf' addParams( options: ['args': '--rnaviral'] )
workflow test_spades_single_end { workflow test_spades_single_end {
input = [ [ id:'test', single_end:true ], // meta map input = [ [ id:'test', single_end:true ], // meta map
[ file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ] [ file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ],
[],
[]
] ]
SPADES ( input, [] ) SPADES ( input, [] )
} }
@ -14,7 +16,32 @@ workflow test_spades_single_end {
workflow test_spades_paired_end { workflow test_spades_paired_end {
input = [ [ id:'test', single_end:false ], // meta map input = [ [ id:'test', single_end:false ], // meta map
[ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ] file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ],
[],
[]
]
SPADES ( input, [] )
}
workflow test_spades_illumina_nanopore {
input = [ [ id:'test', single_end:false ], // meta map
[ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ],
[],
[ file(params.test_data['sarscov2']['nanopore']['test_fastq_gz'], checkIfExists: true) ]
]
SPADES ( input, [] )
}
// that isnt perfect, because CCS reads should rather be used with -s instead of --pacbio
workflow test_spades_illumina_pacbio {
input = [ [ id:'test', single_end:false ], // meta map
[ file(params.test_data['homo_sapiens']['illumina']['test_1_fastq_gz'], checkIfExists: true),
file(params.test_data['homo_sapiens']['illumina']['test_2_fastq_gz'], checkIfExists: true) ],
[ file(params.test_data['homo_sapiens']['pacbio']['ccs_fq_gz'], checkIfExists: true) ],
[]
] ]
SPADES ( input, [] ) SPADES ( input, [] )

View file

@ -1,23 +1,52 @@
- name: spades single end - name: spades test_spades_single_end
command: nextflow run ./tests/modules/spades -entry test_spades_single_end -c tests/config/nextflow.config command: nextflow run tests/modules/spades -entry test_spades_single_end -c tests/config/nextflow.config
tags: tags:
- spades - spades
files: files:
- path: output/spades/test.assembly.gfa - path: output/spades/test.assembly.gfa.gz
md5sum: a995d1d413031534180d2b3b715fa921 md5sum: e5eab229363a906954a07df00e2495a6
- path: output/spades/test.contigs.fa - path: output/spades/test.contigs.fa.gz
md5sum: 65ba6a517c152dbe219bf4b5b92bdad7 md5sum: 64f6b339872b934138c6efd6baa445f4
- path: output/spades/test.scaffolds.fa - path: output/spades/test.scaffolds.fa.gz
md5sum: 65ba6a517c152dbe219bf4b5b92bdad7 md5sum: 64f6b339872b934138c6efd6baa445f4
- path: output/spades/test.spades.log - path: output/spades/test.spades.log
- name: spades paired end - name: spades test_spades_paired_end
command: nextflow run ./tests/modules/spades -entry test_spades_paired_end -c tests/config/nextflow.config command: nextflow run tests/modules/spades -entry test_spades_paired_end -c tests/config/nextflow.config
tags: tags:
- spades - spades
files: files:
- path: output/spades/test.assembly.gfa - path: output/spades/test.assembly.gfa.gz
md5sum: bb053ef4e9250829c980ca17fbdbe3e9 md5sum: c8614fb69907ae832a1359a054af240f
- path: output/spades/test.contigs.fa - path: output/spades/test.contigs.fa.gz
md5sum: 4476d409da70d9f7fc2aa8f25bbaf7fd md5sum: eab5165b3cda96c235aaa1388010cb27
- path: output/spades/test.spades.log - path: output/spades/test.spades.log
- path: output/spades/warnings.log
- name: spades test_spades_illumina_nanopore
command: nextflow run tests/modules/spades -entry test_spades_illumina_nanopore -c tests/config/nextflow.config
tags:
- spades
files:
- path: output/spades/test.assembly.gfa.gz
md5sum: e438534f14e107f005efdd659adeba6a
- path: output/spades/test.contigs.fa.gz
md5sum: 027b0e54bfd8f4bc359e751e094133ef
- path: output/spades/test.scaffolds.fa.gz
md5sum: 027b0e54bfd8f4bc359e751e094133ef
- path: output/spades/test.spades.log
- path: output/spades/warnings.log
- name: spades test_spades_illumina_pacbio
command: nextflow run tests/modules/spades -entry test_spades_illumina_pacbio -c tests/config/nextflow.config
tags:
- spades
files:
- path: output/spades/test.assembly.gfa.gz
md5sum: e12aaf83d8dbfc313339b7636ba43447
- path: output/spades/test.contigs.fa.gz
md5sum: 78523f66d34ac4d5a4890f353c1a6ec6
- path: output/spades/test.scaffolds.fa.gz
md5sum: 78523f66d34ac4d5a4890f353c1a6ec6
- path: output/spades/test.spades.log
- path: output/spades/warnings.log