Unicycler with long read input (#1041)

* Unicycler with long read input

* tests and md5sums

* remove unstable md5sums

* Update modules/unicycler/main.nf

Co-authored-by: Harshil Patel <drpatelh@users.noreply.github.com>
This commit is contained in:
Daniel Straub 2021-11-15 12:48:56 +01:00 committed by GitHub
parent 9475960928
commit 171a2a2dbf
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 54 additions and 22 deletions

View file

@ -19,26 +19,30 @@ process UNICYCLER {
} }
input: input:
tuple val(meta), path(reads) tuple val(meta), path(shortreads), path(longreads)
output: output:
tuple val(meta), path('*.scaffolds.fa'), emit: scaffolds tuple val(meta), path('*.scaffolds.fa.gz'), emit: scaffolds
tuple val(meta), path('*.assembly.gfa'), emit: gfa tuple val(meta), path('*.assembly.gfa.gz'), emit: gfa
tuple val(meta), path('*.log') , emit: log tuple val(meta), path('*.log') , emit: log
path "versions.yml" , emit: versions path "versions.yml" , emit: versions
script: script:
def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}" def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}"
def input_reads = meta.single_end ? "-s $reads" : "-1 ${reads[0]} -2 ${reads[1]}" def short_reads = shortreads ? ( meta.single_end ? "-s $shortreads" : "-1 ${shortreads[0]} -2 ${shortreads[1]}" ) : ""
def long_reads = longreads ? "-l $longreads" : ""
""" """
unicycler \\ unicycler \\
--threads $task.cpus \\ --threads $task.cpus \\
$options.args \\ $options.args \\
$input_reads \\ $short_reads \\
$long_reads \\
--out ./ --out ./
mv assembly.fasta ${prefix}.scaffolds.fa mv assembly.fasta ${prefix}.scaffolds.fa
gzip -n ${prefix}.scaffolds.fa
mv assembly.gfa ${prefix}.assembly.gfa mv assembly.gfa ${prefix}.assembly.gfa
gzip -n ${prefix}.assembly.gfa
mv unicycler.log ${prefix}.unicycler.log mv unicycler.log ${prefix}.unicycler.log
cat <<-END_VERSIONS > versions.yml cat <<-END_VERSIONS > versions.yml

View file

@ -19,11 +19,15 @@ input:
description: | description: |
Groovy Map containing sample information Groovy Map containing sample information
e.g. [ id:'test', single_end:false ] e.g. [ id:'test', single_end:false ]
- reads: - shortreads:
type: file type: file
description: | description: |
List of input FastQ files of size 1 and 2 for single-end and paired-end data, List of input Illumina FastQ files of size 1 and 2 for single-end and paired-end data,
respectively. respectively.
- longreads:
type: file
description: |
List of input FastQ files of size 1, PacBio or Nanopore long reads.
output: output:
- meta: - meta:
type: map type: map
@ -37,11 +41,11 @@ output:
- scaffolds: - scaffolds:
type: file type: file
description: Fasta file containing scaffolds description: Fasta file containing scaffolds
pattern: "*.{scaffolds.fa}" pattern: "*.{scaffolds.fa.gz}"
- gfa: - gfa:
type: file type: file
description: gfa file containing assembly description: gfa file containing assembly
pattern: "*.{assembly.gfa}" pattern: "*.{assembly.gfa.gz}"
- log: - log:
type: file type: file
description: unicycler log file description: unicycler log file
@ -53,3 +57,4 @@ output:
authors: authors:
- "@JoseEspinosa" - "@JoseEspinosa"
- "@drpatelh" - "@drpatelh"
- "@d4straub"

View file

@ -6,7 +6,8 @@ include { UNICYCLER } from '../../../modules/unicycler/main.nf' addParams( optio
workflow test_unicycler_single_end { workflow test_unicycler_single_end {
input = [ [ id:'test', single_end:true ], // meta map input = [ [ id:'test', single_end:true ], // meta map
[ file(params.test_data['sarscov2']['nanopore']['test_fastq_gz'], checkIfExists: true) ] [ file(params.test_data['bacteroides_fragilis']['illumina']['test1_1_fastq_gz'], checkIfExists: true) ],
[]
] ]
UNICYCLER ( input ) UNICYCLER ( input )
@ -14,8 +15,19 @@ workflow test_unicycler_single_end {
workflow test_unicycler_paired_end { workflow test_unicycler_paired_end {
input = [ [ id:'test', single_end:false ], // meta map input = [ [ id:'test', single_end:false ], // meta map
[ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), [ file(params.test_data['bacteroides_fragilis']['illumina']['test1_1_fastq_gz'], checkIfExists: true),
file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ] file(params.test_data['bacteroides_fragilis']['illumina']['test1_2_fastq_gz'], checkIfExists: true) ],
[]
]
UNICYCLER ( input )
}
workflow test_unicycler_shortreads_longreads {
input = [ [ id:'test', single_end:false ], // meta map
[ file(params.test_data['bacteroides_fragilis']['illumina']['test1_1_fastq_gz'], checkIfExists: true),
file(params.test_data['bacteroides_fragilis']['illumina']['test1_2_fastq_gz'], checkIfExists: true) ],
[ file(params.test_data['bacteroides_fragilis']['nanopore']['test_fastq_gz'], checkIfExists: true) ]
] ]
UNICYCLER ( input ) UNICYCLER ( input )

View file

@ -1,21 +1,32 @@
- name: unicycler single-end - name: unicycler test_unicycler_single_end
command: nextflow run ./tests/modules/unicycler -entry test_unicycler_single_end -c tests/config/nextflow.config command: nextflow run tests/modules/unicycler -entry test_unicycler_single_end -c tests/config/nextflow.config
tags: tags:
- unicycler - unicycler
files: files:
- path: output/unicycler/test.scaffolds.fa - path: output/unicycler/test.assembly.gfa.gz
- path: output/unicycler/test.assembly.gfa - path: output/unicycler/test.scaffolds.fa.gz
- path: output/unicycler/test.unicycler.log - path: output/unicycler/test.unicycler.log
contains: contains:
- "Assembly complete" - "Assembly complete"
- name: unicycler paired-end - name: unicycler test_unicycler_paired_end
command: nextflow run ./tests/modules/unicycler -entry test_unicycler_paired_end -c tests/config/nextflow.config command: nextflow run tests/modules/unicycler -entry test_unicycler_paired_end -c tests/config/nextflow.config
tags: tags:
- unicycler - unicycler
files: files:
- path: output/unicycler/test.scaffolds.fa - path: output/unicycler/test.assembly.gfa.gz
- path: output/unicycler/test.assembly.gfa - path: output/unicycler/test.scaffolds.fa.gz
- path: output/unicycler/test.unicycler.log
contains:
- "Assembly complete"
- name: unicycler test_unicycler_shortreads_longreads
command: nextflow run tests/modules/unicycler -entry test_unicycler_shortreads_longreads -c tests/config/nextflow.config
tags:
- unicycler
files:
- path: output/unicycler/test.assembly.gfa.gz
- path: output/unicycler/test.scaffolds.fa.gz
- path: output/unicycler/test.unicycler.log - path: output/unicycler/test.unicycler.log
contains: contains:
- "Assembly complete" - "Assembly complete"