mirror of
https://github.com/MillironX/nf-core_modules.git
synced 2024-11-10 20:23:10 +00:00
Change Spades input (#1039)
* Change spades module * correct meta map description * adjust memory handling * remove trailing whitespace * fix hmm input * Update modules/spades/main.nf Co-authored-by: Harshil Patel <drpatelh@users.noreply.github.com>
This commit is contained in:
parent
171a2a2dbf
commit
661bdb645e
4 changed files with 115 additions and 32 deletions
|
@ -19,46 +19,57 @@ process SPADES {
|
||||||
}
|
}
|
||||||
|
|
||||||
input:
|
input:
|
||||||
tuple val(meta), path(reads)
|
tuple val(meta), path(illumina), path(pacbio), path(nanopore)
|
||||||
path hmm
|
path hmm
|
||||||
|
|
||||||
output:
|
output:
|
||||||
tuple val(meta), path('*.scaffolds.fa') , optional:true, emit: scaffolds
|
tuple val(meta), path('*.scaffolds.fa.gz') , optional:true, emit: scaffolds
|
||||||
tuple val(meta), path('*.contigs.fa') , optional:true, emit: contigs
|
tuple val(meta), path('*.contigs.fa.gz') , optional:true, emit: contigs
|
||||||
tuple val(meta), path('*.transcripts.fa') , optional:true, emit: transcripts
|
tuple val(meta), path('*.transcripts.fa.gz') , optional:true, emit: transcripts
|
||||||
tuple val(meta), path('*.gene_clusters.fa'), optional:true, emit: gene_clusters
|
tuple val(meta), path('*.gene_clusters.fa.gz'), optional:true, emit: gene_clusters
|
||||||
tuple val(meta), path('*.assembly.gfa') , optional:true, emit: gfa
|
tuple val(meta), path('*.assembly.gfa.gz') , optional:true, emit: gfa
|
||||||
tuple val(meta), path('*.log') , emit: log
|
tuple val(meta), path('*.log') , emit: log
|
||||||
path "versions.yml" , emit: versions
|
path "versions.yml" , emit: versions
|
||||||
|
|
||||||
script:
|
script:
|
||||||
def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}"
|
def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}"
|
||||||
def input_reads = meta.single_end ? "-s $reads" : "-1 ${reads[0]} -2 ${reads[1]}"
|
def maxmem = task.memory.toGiga()
|
||||||
def custom_hmms = params.spades_hmm ? "--custom-hmms $hmm" : ""
|
def illumina_reads = illumina ? ( meta.single_end ? "-s $illumina" : "-1 ${illumina[0]} -2 ${illumina[1]}" ) : ""
|
||||||
|
def pacbio_reads = pacbio ? "--pacbio $pacbio" : ""
|
||||||
|
def nanopore_reads = nanopore ? "--nanopore $nanopore" : ""
|
||||||
|
def custom_hmms = hmm ? "--custom-hmms $hmm" : ""
|
||||||
"""
|
"""
|
||||||
spades.py \\
|
spades.py \\
|
||||||
$options.args \\
|
$options.args \\
|
||||||
--threads $task.cpus \\
|
--threads $task.cpus \\
|
||||||
|
--memory $maxmem \\
|
||||||
$custom_hmms \\
|
$custom_hmms \\
|
||||||
$input_reads \\
|
$illumina_reads \\
|
||||||
|
$pacbio_reads \\
|
||||||
|
$nanopore_reads \\
|
||||||
-o ./
|
-o ./
|
||||||
mv spades.log ${prefix}.spades.log
|
mv spades.log ${prefix}.spades.log
|
||||||
|
|
||||||
if [ -f scaffolds.fasta ]; then
|
if [ -f scaffolds.fasta ]; then
|
||||||
mv scaffolds.fasta ${prefix}.scaffolds.fa
|
mv scaffolds.fasta ${prefix}.scaffolds.fa
|
||||||
|
gzip -n ${prefix}.scaffolds.fa
|
||||||
fi
|
fi
|
||||||
if [ -f contigs.fasta ]; then
|
if [ -f contigs.fasta ]; then
|
||||||
mv contigs.fasta ${prefix}.contigs.fa
|
mv contigs.fasta ${prefix}.contigs.fa
|
||||||
|
gzip -n ${prefix}.contigs.fa
|
||||||
fi
|
fi
|
||||||
if [ -f transcripts.fasta ]; then
|
if [ -f transcripts.fasta ]; then
|
||||||
mv transcripts.fasta ${prefix}.transcripts.fa
|
mv transcripts.fasta ${prefix}.transcripts.fa
|
||||||
|
gzip -n ${prefix}.transcripts.fa
|
||||||
fi
|
fi
|
||||||
if [ -f assembly_graph_with_scaffolds.gfa ]; then
|
if [ -f assembly_graph_with_scaffolds.gfa ]; then
|
||||||
mv assembly_graph_with_scaffolds.gfa ${prefix}.assembly.gfa
|
mv assembly_graph_with_scaffolds.gfa ${prefix}.assembly.gfa
|
||||||
|
gzip -n ${prefix}.assembly.gfa
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if [ -f gene_clusters.fasta ]; then
|
if [ -f gene_clusters.fasta ]; then
|
||||||
mv gene_clusters.fasta ${prefix}.gene_clusters.fa
|
mv gene_clusters.fasta ${prefix}.gene_clusters.fa
|
||||||
|
gzip -n ${prefix}.gene_clusters.fa
|
||||||
fi
|
fi
|
||||||
|
|
||||||
cat <<-END_VERSIONS > versions.yml
|
cat <<-END_VERSIONS > versions.yml
|
||||||
|
|
|
@ -20,11 +20,20 @@ input:
|
||||||
description: |
|
description: |
|
||||||
Groovy Map containing sample information
|
Groovy Map containing sample information
|
||||||
e.g. [ id:'test', single_end:false ]
|
e.g. [ id:'test', single_end:false ]
|
||||||
- reads:
|
- illumina:
|
||||||
type: file
|
type: file
|
||||||
description: |
|
description: |
|
||||||
List of input FastQ files of size 1 and 2 for single-end and paired-end data,
|
List of input FastQ (Illumina or PacBio CCS reads) files
|
||||||
respectively.
|
of size 1 and 2 for single-end and paired-end data,
|
||||||
|
respectively. This input data type is required.
|
||||||
|
- pacbio:
|
||||||
|
type: file
|
||||||
|
description: |
|
||||||
|
List of input PacBio CLR FastQ files of size 1.
|
||||||
|
- nanopore:
|
||||||
|
type: file
|
||||||
|
description: |
|
||||||
|
List of input FastQ files of size 1, originating from Oxford Nanopore technology.
|
||||||
- hmm:
|
- hmm:
|
||||||
type: file
|
type: file
|
||||||
description:
|
description:
|
||||||
|
@ -39,26 +48,32 @@ output:
|
||||||
type: file
|
type: file
|
||||||
description: |
|
description: |
|
||||||
Fasta file containing scaffolds
|
Fasta file containing scaffolds
|
||||||
|
pattern: "*.fa.gz"
|
||||||
- contigs:
|
- contigs:
|
||||||
type: file
|
type: file
|
||||||
description: |
|
description: |
|
||||||
Fasta file containing contigs
|
Fasta file containing contigs
|
||||||
|
pattern: "*.fa.gz"
|
||||||
- transcripts:
|
- transcripts:
|
||||||
type: file
|
type: file
|
||||||
description: |
|
description: |
|
||||||
Fasta file containing transcripts
|
Fasta file containing transcripts
|
||||||
|
pattern: "*.fa.gz"
|
||||||
- gene_clusters:
|
- gene_clusters:
|
||||||
type: file
|
type: file
|
||||||
description: |
|
description: |
|
||||||
Fasta file containing gene_clusters
|
Fasta file containing gene_clusters
|
||||||
|
pattern: "*.fa.gz"
|
||||||
- gfa:
|
- gfa:
|
||||||
type: file
|
type: file
|
||||||
description: |
|
description: |
|
||||||
gfa file containing assembly
|
gfa file containing assembly
|
||||||
|
pattern: "*.gfa.gz"
|
||||||
- log:
|
- log:
|
||||||
type: file
|
type: file
|
||||||
description: |
|
description: |
|
||||||
Spades log file
|
Spades log file
|
||||||
|
pattern: "*.log"
|
||||||
- versions:
|
- versions:
|
||||||
type: file
|
type: file
|
||||||
description: File containing software versions
|
description: File containing software versions
|
||||||
|
@ -67,3 +82,4 @@ output:
|
||||||
authors:
|
authors:
|
||||||
- "@JoseEspinosa"
|
- "@JoseEspinosa"
|
||||||
- "@drpatelh"
|
- "@drpatelh"
|
||||||
|
- "@d4straub"
|
||||||
|
|
|
@ -2,11 +2,13 @@
|
||||||
|
|
||||||
nextflow.enable.dsl = 2
|
nextflow.enable.dsl = 2
|
||||||
|
|
||||||
include { SPADES } from '../../../modules/spades/main.nf' addParams( spades_hmm: false ,options: ['args': '--rnaviral'] )
|
include { SPADES } from '../../../modules/spades/main.nf' addParams( options: ['args': '--rnaviral'] )
|
||||||
|
|
||||||
workflow test_spades_single_end {
|
workflow test_spades_single_end {
|
||||||
input = [ [ id:'test', single_end:true ], // meta map
|
input = [ [ id:'test', single_end:true ], // meta map
|
||||||
[ file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ]
|
[ file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ],
|
||||||
|
[],
|
||||||
|
[]
|
||||||
]
|
]
|
||||||
SPADES ( input, [] )
|
SPADES ( input, [] )
|
||||||
}
|
}
|
||||||
|
@ -14,7 +16,32 @@ workflow test_spades_single_end {
|
||||||
workflow test_spades_paired_end {
|
workflow test_spades_paired_end {
|
||||||
input = [ [ id:'test', single_end:false ], // meta map
|
input = [ [ id:'test', single_end:false ], // meta map
|
||||||
[ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
|
[ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
|
||||||
file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ]
|
file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ],
|
||||||
|
[],
|
||||||
|
[]
|
||||||
|
]
|
||||||
|
|
||||||
|
SPADES ( input, [] )
|
||||||
|
}
|
||||||
|
|
||||||
|
workflow test_spades_illumina_nanopore {
|
||||||
|
input = [ [ id:'test', single_end:false ], // meta map
|
||||||
|
[ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
|
||||||
|
file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ],
|
||||||
|
[],
|
||||||
|
[ file(params.test_data['sarscov2']['nanopore']['test_fastq_gz'], checkIfExists: true) ]
|
||||||
|
]
|
||||||
|
|
||||||
|
SPADES ( input, [] )
|
||||||
|
}
|
||||||
|
|
||||||
|
// that isnt perfect, because CCS reads should rather be used with -s instead of --pacbio
|
||||||
|
workflow test_spades_illumina_pacbio {
|
||||||
|
input = [ [ id:'test', single_end:false ], // meta map
|
||||||
|
[ file(params.test_data['homo_sapiens']['illumina']['test_1_fastq_gz'], checkIfExists: true),
|
||||||
|
file(params.test_data['homo_sapiens']['illumina']['test_2_fastq_gz'], checkIfExists: true) ],
|
||||||
|
[ file(params.test_data['homo_sapiens']['pacbio']['ccs_fq_gz'], checkIfExists: true) ],
|
||||||
|
[]
|
||||||
]
|
]
|
||||||
|
|
||||||
SPADES ( input, [] )
|
SPADES ( input, [] )
|
||||||
|
|
|
@ -1,23 +1,52 @@
|
||||||
- name: spades single end
|
- name: spades test_spades_single_end
|
||||||
command: nextflow run ./tests/modules/spades -entry test_spades_single_end -c tests/config/nextflow.config
|
command: nextflow run tests/modules/spades -entry test_spades_single_end -c tests/config/nextflow.config
|
||||||
tags:
|
tags:
|
||||||
- spades
|
- spades
|
||||||
files:
|
files:
|
||||||
- path: output/spades/test.assembly.gfa
|
- path: output/spades/test.assembly.gfa.gz
|
||||||
md5sum: a995d1d413031534180d2b3b715fa921
|
md5sum: e5eab229363a906954a07df00e2495a6
|
||||||
- path: output/spades/test.contigs.fa
|
- path: output/spades/test.contigs.fa.gz
|
||||||
md5sum: 65ba6a517c152dbe219bf4b5b92bdad7
|
md5sum: 64f6b339872b934138c6efd6baa445f4
|
||||||
- path: output/spades/test.scaffolds.fa
|
- path: output/spades/test.scaffolds.fa.gz
|
||||||
md5sum: 65ba6a517c152dbe219bf4b5b92bdad7
|
md5sum: 64f6b339872b934138c6efd6baa445f4
|
||||||
- path: output/spades/test.spades.log
|
- path: output/spades/test.spades.log
|
||||||
|
|
||||||
- name: spades paired end
|
- name: spades test_spades_paired_end
|
||||||
command: nextflow run ./tests/modules/spades -entry test_spades_paired_end -c tests/config/nextflow.config
|
command: nextflow run tests/modules/spades -entry test_spades_paired_end -c tests/config/nextflow.config
|
||||||
tags:
|
tags:
|
||||||
- spades
|
- spades
|
||||||
files:
|
files:
|
||||||
- path: output/spades/test.assembly.gfa
|
- path: output/spades/test.assembly.gfa.gz
|
||||||
md5sum: bb053ef4e9250829c980ca17fbdbe3e9
|
md5sum: c8614fb69907ae832a1359a054af240f
|
||||||
- path: output/spades/test.contigs.fa
|
- path: output/spades/test.contigs.fa.gz
|
||||||
md5sum: 4476d409da70d9f7fc2aa8f25bbaf7fd
|
md5sum: eab5165b3cda96c235aaa1388010cb27
|
||||||
- path: output/spades/test.spades.log
|
- path: output/spades/test.spades.log
|
||||||
|
- path: output/spades/warnings.log
|
||||||
|
|
||||||
|
- name: spades test_spades_illumina_nanopore
|
||||||
|
command: nextflow run tests/modules/spades -entry test_spades_illumina_nanopore -c tests/config/nextflow.config
|
||||||
|
tags:
|
||||||
|
- spades
|
||||||
|
files:
|
||||||
|
- path: output/spades/test.assembly.gfa.gz
|
||||||
|
md5sum: e438534f14e107f005efdd659adeba6a
|
||||||
|
- path: output/spades/test.contigs.fa.gz
|
||||||
|
md5sum: 027b0e54bfd8f4bc359e751e094133ef
|
||||||
|
- path: output/spades/test.scaffolds.fa.gz
|
||||||
|
md5sum: 027b0e54bfd8f4bc359e751e094133ef
|
||||||
|
- path: output/spades/test.spades.log
|
||||||
|
- path: output/spades/warnings.log
|
||||||
|
|
||||||
|
- name: spades test_spades_illumina_pacbio
|
||||||
|
command: nextflow run tests/modules/spades -entry test_spades_illumina_pacbio -c tests/config/nextflow.config
|
||||||
|
tags:
|
||||||
|
- spades
|
||||||
|
files:
|
||||||
|
- path: output/spades/test.assembly.gfa.gz
|
||||||
|
md5sum: e12aaf83d8dbfc313339b7636ba43447
|
||||||
|
- path: output/spades/test.contigs.fa.gz
|
||||||
|
md5sum: 78523f66d34ac4d5a4890f353c1a6ec6
|
||||||
|
- path: output/spades/test.scaffolds.fa.gz
|
||||||
|
md5sum: 78523f66d34ac4d5a4890f353c1a6ec6
|
||||||
|
- path: output/spades/test.spades.log
|
||||||
|
- path: output/spades/warnings.log
|
||||||
|
|
Loading…
Reference in a new issue