mirror of
https://github.com/MillironX/nf-core_modules.git
synced 2024-11-13 05:13:09 +00:00
Change Spades input (#1039)
* Change spades module * correct meta map description * adjust memory handling * remove trailing whitespace * fix hmm input * Update modules/spades/main.nf Co-authored-by: Harshil Patel <drpatelh@users.noreply.github.com>
This commit is contained in:
parent
171a2a2dbf
commit
661bdb645e
4 changed files with 115 additions and 32 deletions
|
@ -19,46 +19,57 @@ process SPADES {
|
|||
}
|
||||
|
||||
input:
|
||||
tuple val(meta), path(reads)
|
||||
tuple val(meta), path(illumina), path(pacbio), path(nanopore)
|
||||
path hmm
|
||||
|
||||
output:
|
||||
tuple val(meta), path('*.scaffolds.fa') , optional:true, emit: scaffolds
|
||||
tuple val(meta), path('*.contigs.fa') , optional:true, emit: contigs
|
||||
tuple val(meta), path('*.transcripts.fa') , optional:true, emit: transcripts
|
||||
tuple val(meta), path('*.gene_clusters.fa'), optional:true, emit: gene_clusters
|
||||
tuple val(meta), path('*.assembly.gfa') , optional:true, emit: gfa
|
||||
tuple val(meta), path('*.log') , emit: log
|
||||
path "versions.yml" , emit: versions
|
||||
tuple val(meta), path('*.scaffolds.fa.gz') , optional:true, emit: scaffolds
|
||||
tuple val(meta), path('*.contigs.fa.gz') , optional:true, emit: contigs
|
||||
tuple val(meta), path('*.transcripts.fa.gz') , optional:true, emit: transcripts
|
||||
tuple val(meta), path('*.gene_clusters.fa.gz'), optional:true, emit: gene_clusters
|
||||
tuple val(meta), path('*.assembly.gfa.gz') , optional:true, emit: gfa
|
||||
tuple val(meta), path('*.log') , emit: log
|
||||
path "versions.yml" , emit: versions
|
||||
|
||||
script:
|
||||
def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}"
|
||||
def input_reads = meta.single_end ? "-s $reads" : "-1 ${reads[0]} -2 ${reads[1]}"
|
||||
def custom_hmms = params.spades_hmm ? "--custom-hmms $hmm" : ""
|
||||
def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}"
|
||||
def maxmem = task.memory.toGiga()
|
||||
def illumina_reads = illumina ? ( meta.single_end ? "-s $illumina" : "-1 ${illumina[0]} -2 ${illumina[1]}" ) : ""
|
||||
def pacbio_reads = pacbio ? "--pacbio $pacbio" : ""
|
||||
def nanopore_reads = nanopore ? "--nanopore $nanopore" : ""
|
||||
def custom_hmms = hmm ? "--custom-hmms $hmm" : ""
|
||||
"""
|
||||
spades.py \\
|
||||
$options.args \\
|
||||
--threads $task.cpus \\
|
||||
--memory $maxmem \\
|
||||
$custom_hmms \\
|
||||
$input_reads \\
|
||||
$illumina_reads \\
|
||||
$pacbio_reads \\
|
||||
$nanopore_reads \\
|
||||
-o ./
|
||||
mv spades.log ${prefix}.spades.log
|
||||
|
||||
if [ -f scaffolds.fasta ]; then
|
||||
mv scaffolds.fasta ${prefix}.scaffolds.fa
|
||||
gzip -n ${prefix}.scaffolds.fa
|
||||
fi
|
||||
if [ -f contigs.fasta ]; then
|
||||
mv contigs.fasta ${prefix}.contigs.fa
|
||||
gzip -n ${prefix}.contigs.fa
|
||||
fi
|
||||
if [ -f transcripts.fasta ]; then
|
||||
mv transcripts.fasta ${prefix}.transcripts.fa
|
||||
gzip -n ${prefix}.transcripts.fa
|
||||
fi
|
||||
if [ -f assembly_graph_with_scaffolds.gfa ]; then
|
||||
mv assembly_graph_with_scaffolds.gfa ${prefix}.assembly.gfa
|
||||
gzip -n ${prefix}.assembly.gfa
|
||||
fi
|
||||
|
||||
if [ -f gene_clusters.fasta ]; then
|
||||
mv gene_clusters.fasta ${prefix}.gene_clusters.fa
|
||||
gzip -n ${prefix}.gene_clusters.fa
|
||||
fi
|
||||
|
||||
cat <<-END_VERSIONS > versions.yml
|
||||
|
|
|
@ -20,11 +20,20 @@ input:
|
|||
description: |
|
||||
Groovy Map containing sample information
|
||||
e.g. [ id:'test', single_end:false ]
|
||||
- reads:
|
||||
- illumina:
|
||||
type: file
|
||||
description: |
|
||||
List of input FastQ files of size 1 and 2 for single-end and paired-end data,
|
||||
respectively.
|
||||
List of input FastQ (Illumina or PacBio CCS reads) files
|
||||
of size 1 and 2 for single-end and paired-end data,
|
||||
respectively. This input data type is required.
|
||||
- pacbio:
|
||||
type: file
|
||||
description: |
|
||||
List of input PacBio CLR FastQ files of size 1.
|
||||
- nanopore:
|
||||
type: file
|
||||
description: |
|
||||
List of input FastQ files of size 1, originating from Oxford Nanopore technology.
|
||||
- hmm:
|
||||
type: file
|
||||
description:
|
||||
|
@ -39,26 +48,32 @@ output:
|
|||
type: file
|
||||
description: |
|
||||
Fasta file containing scaffolds
|
||||
pattern: "*.fa.gz"
|
||||
- contigs:
|
||||
type: file
|
||||
description: |
|
||||
Fasta file containing contigs
|
||||
pattern: "*.fa.gz"
|
||||
- transcripts:
|
||||
type: file
|
||||
description: |
|
||||
Fasta file containing transcripts
|
||||
pattern: "*.fa.gz"
|
||||
- gene_clusters:
|
||||
type: file
|
||||
description: |
|
||||
Fasta file containing gene_clusters
|
||||
pattern: "*.fa.gz"
|
||||
- gfa:
|
||||
type: file
|
||||
description: |
|
||||
gfa file containing assembly
|
||||
pattern: "*.gfa.gz"
|
||||
- log:
|
||||
type: file
|
||||
description: |
|
||||
Spades log file
|
||||
pattern: "*.log"
|
||||
- versions:
|
||||
type: file
|
||||
description: File containing software versions
|
||||
|
@ -67,3 +82,4 @@ output:
|
|||
authors:
|
||||
- "@JoseEspinosa"
|
||||
- "@drpatelh"
|
||||
- "@d4straub"
|
||||
|
|
|
@ -2,11 +2,13 @@
|
|||
|
||||
nextflow.enable.dsl = 2
|
||||
|
||||
include { SPADES } from '../../../modules/spades/main.nf' addParams( spades_hmm: false ,options: ['args': '--rnaviral'] )
|
||||
include { SPADES } from '../../../modules/spades/main.nf' addParams( options: ['args': '--rnaviral'] )
|
||||
|
||||
workflow test_spades_single_end {
|
||||
input = [ [ id:'test', single_end:true ], // meta map
|
||||
[ file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ]
|
||||
[ file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ],
|
||||
[],
|
||||
[]
|
||||
]
|
||||
SPADES ( input, [] )
|
||||
}
|
||||
|
@ -14,7 +16,32 @@ workflow test_spades_single_end {
|
|||
workflow test_spades_paired_end {
|
||||
input = [ [ id:'test', single_end:false ], // meta map
|
||||
[ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
|
||||
file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ]
|
||||
file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ],
|
||||
[],
|
||||
[]
|
||||
]
|
||||
|
||||
SPADES ( input, [] )
|
||||
}
|
||||
|
||||
workflow test_spades_illumina_nanopore {
|
||||
input = [ [ id:'test', single_end:false ], // meta map
|
||||
[ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
|
||||
file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ],
|
||||
[],
|
||||
[ file(params.test_data['sarscov2']['nanopore']['test_fastq_gz'], checkIfExists: true) ]
|
||||
]
|
||||
|
||||
SPADES ( input, [] )
|
||||
}
|
||||
|
||||
// that isnt perfect, because CCS reads should rather be used with -s instead of --pacbio
|
||||
workflow test_spades_illumina_pacbio {
|
||||
input = [ [ id:'test', single_end:false ], // meta map
|
||||
[ file(params.test_data['homo_sapiens']['illumina']['test_1_fastq_gz'], checkIfExists: true),
|
||||
file(params.test_data['homo_sapiens']['illumina']['test_2_fastq_gz'], checkIfExists: true) ],
|
||||
[ file(params.test_data['homo_sapiens']['pacbio']['ccs_fq_gz'], checkIfExists: true) ],
|
||||
[]
|
||||
]
|
||||
|
||||
SPADES ( input, [] )
|
||||
|
|
|
@ -1,23 +1,52 @@
|
|||
- name: spades single end
|
||||
command: nextflow run ./tests/modules/spades -entry test_spades_single_end -c tests/config/nextflow.config
|
||||
- name: spades test_spades_single_end
|
||||
command: nextflow run tests/modules/spades -entry test_spades_single_end -c tests/config/nextflow.config
|
||||
tags:
|
||||
- spades
|
||||
files:
|
||||
- path: output/spades/test.assembly.gfa
|
||||
md5sum: a995d1d413031534180d2b3b715fa921
|
||||
- path: output/spades/test.contigs.fa
|
||||
md5sum: 65ba6a517c152dbe219bf4b5b92bdad7
|
||||
- path: output/spades/test.scaffolds.fa
|
||||
md5sum: 65ba6a517c152dbe219bf4b5b92bdad7
|
||||
- path: output/spades/test.assembly.gfa.gz
|
||||
md5sum: e5eab229363a906954a07df00e2495a6
|
||||
- path: output/spades/test.contigs.fa.gz
|
||||
md5sum: 64f6b339872b934138c6efd6baa445f4
|
||||
- path: output/spades/test.scaffolds.fa.gz
|
||||
md5sum: 64f6b339872b934138c6efd6baa445f4
|
||||
- path: output/spades/test.spades.log
|
||||
|
||||
- name: spades paired end
|
||||
command: nextflow run ./tests/modules/spades -entry test_spades_paired_end -c tests/config/nextflow.config
|
||||
- name: spades test_spades_paired_end
|
||||
command: nextflow run tests/modules/spades -entry test_spades_paired_end -c tests/config/nextflow.config
|
||||
tags:
|
||||
- spades
|
||||
files:
|
||||
- path: output/spades/test.assembly.gfa
|
||||
md5sum: bb053ef4e9250829c980ca17fbdbe3e9
|
||||
- path: output/spades/test.contigs.fa
|
||||
md5sum: 4476d409da70d9f7fc2aa8f25bbaf7fd
|
||||
- path: output/spades/test.assembly.gfa.gz
|
||||
md5sum: c8614fb69907ae832a1359a054af240f
|
||||
- path: output/spades/test.contigs.fa.gz
|
||||
md5sum: eab5165b3cda96c235aaa1388010cb27
|
||||
- path: output/spades/test.spades.log
|
||||
- path: output/spades/warnings.log
|
||||
|
||||
- name: spades test_spades_illumina_nanopore
|
||||
command: nextflow run tests/modules/spades -entry test_spades_illumina_nanopore -c tests/config/nextflow.config
|
||||
tags:
|
||||
- spades
|
||||
files:
|
||||
- path: output/spades/test.assembly.gfa.gz
|
||||
md5sum: e438534f14e107f005efdd659adeba6a
|
||||
- path: output/spades/test.contigs.fa.gz
|
||||
md5sum: 027b0e54bfd8f4bc359e751e094133ef
|
||||
- path: output/spades/test.scaffolds.fa.gz
|
||||
md5sum: 027b0e54bfd8f4bc359e751e094133ef
|
||||
- path: output/spades/test.spades.log
|
||||
- path: output/spades/warnings.log
|
||||
|
||||
- name: spades test_spades_illumina_pacbio
|
||||
command: nextflow run tests/modules/spades -entry test_spades_illumina_pacbio -c tests/config/nextflow.config
|
||||
tags:
|
||||
- spades
|
||||
files:
|
||||
- path: output/spades/test.assembly.gfa.gz
|
||||
md5sum: e12aaf83d8dbfc313339b7636ba43447
|
||||
- path: output/spades/test.contigs.fa.gz
|
||||
md5sum: 78523f66d34ac4d5a4890f353c1a6ec6
|
||||
- path: output/spades/test.scaffolds.fa.gz
|
||||
md5sum: 78523f66d34ac4d5a4890f353c1a6ec6
|
||||
- path: output/spades/test.spades.log
|
||||
- path: output/spades/warnings.log
|
||||
|
|
Loading…
Reference in a new issue