Unicycler with long read input (#1041)

* Unicycler with long read input

* tests and md5sums

* remove unstable md5sums

* Update modules/unicycler/main.nf

Co-authored-by: Harshil Patel <drpatelh@users.noreply.github.com>
This commit is contained in:
Daniel Straub 2021-11-15 12:48:56 +01:00 committed by GitHub
parent 9475960928
commit 171a2a2dbf
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 54 additions and 22 deletions

View file

@ -19,26 +19,30 @@ process UNICYCLER {
}
input:
tuple val(meta), path(reads)
tuple val(meta), path(shortreads), path(longreads)
output:
tuple val(meta), path('*.scaffolds.fa'), emit: scaffolds
tuple val(meta), path('*.assembly.gfa'), emit: gfa
tuple val(meta), path('*.log') , emit: log
path "versions.yml" , emit: versions
tuple val(meta), path('*.scaffolds.fa.gz'), emit: scaffolds
tuple val(meta), path('*.assembly.gfa.gz'), emit: gfa
tuple val(meta), path('*.log') , emit: log
path "versions.yml" , emit: versions
script:
def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}"
def input_reads = meta.single_end ? "-s $reads" : "-1 ${reads[0]} -2 ${reads[1]}"
def short_reads = shortreads ? ( meta.single_end ? "-s $shortreads" : "-1 ${shortreads[0]} -2 ${shortreads[1]}" ) : ""
def long_reads = longreads ? "-l $longreads" : ""
"""
unicycler \\
--threads $task.cpus \\
$options.args \\
$input_reads \\
$short_reads \\
$long_reads \\
--out ./
mv assembly.fasta ${prefix}.scaffolds.fa
gzip -n ${prefix}.scaffolds.fa
mv assembly.gfa ${prefix}.assembly.gfa
gzip -n ${prefix}.assembly.gfa
mv unicycler.log ${prefix}.unicycler.log
cat <<-END_VERSIONS > versions.yml

View file

@ -19,11 +19,15 @@ input:
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- reads:
- shortreads:
type: file
description: |
List of input FastQ files of size 1 and 2 for single-end and paired-end data,
List of input Illumina FastQ files of size 1 and 2 for single-end and paired-end data,
respectively.
- longreads:
type: file
description: |
List of input FastQ files of size 1, PacBio or Nanopore long reads.
output:
- meta:
type: map
@ -37,11 +41,11 @@ output:
- scaffolds:
type: file
description: Fasta file containing scaffolds
pattern: "*.{scaffolds.fa}"
pattern: "*.{scaffolds.fa.gz}"
- gfa:
type: file
description: gfa file containing assembly
pattern: "*.{assembly.gfa}"
pattern: "*.{assembly.gfa.gz}"
- log:
type: file
description: unicycler log file
@ -53,3 +57,4 @@ output:
authors:
- "@JoseEspinosa"
- "@drpatelh"
- "@d4straub"

View file

@ -6,7 +6,8 @@ include { UNICYCLER } from '../../../modules/unicycler/main.nf' addParams( optio
workflow test_unicycler_single_end {
input = [ [ id:'test', single_end:true ], // meta map
[ file(params.test_data['sarscov2']['nanopore']['test_fastq_gz'], checkIfExists: true) ]
[ file(params.test_data['bacteroides_fragilis']['illumina']['test1_1_fastq_gz'], checkIfExists: true) ],
[]
]
UNICYCLER ( input )
@ -14,8 +15,19 @@ workflow test_unicycler_single_end {
workflow test_unicycler_paired_end {
input = [ [ id:'test', single_end:false ], // meta map
[ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ]
[ file(params.test_data['bacteroides_fragilis']['illumina']['test1_1_fastq_gz'], checkIfExists: true),
file(params.test_data['bacteroides_fragilis']['illumina']['test1_2_fastq_gz'], checkIfExists: true) ],
[]
]
UNICYCLER ( input )
}
workflow test_unicycler_shortreads_longreads {
input = [ [ id:'test', single_end:false ], // meta map
[ file(params.test_data['bacteroides_fragilis']['illumina']['test1_1_fastq_gz'], checkIfExists: true),
file(params.test_data['bacteroides_fragilis']['illumina']['test1_2_fastq_gz'], checkIfExists: true) ],
[ file(params.test_data['bacteroides_fragilis']['nanopore']['test_fastq_gz'], checkIfExists: true) ]
]
UNICYCLER ( input )

View file

@ -1,21 +1,32 @@
- name: unicycler single-end
command: nextflow run ./tests/modules/unicycler -entry test_unicycler_single_end -c tests/config/nextflow.config
- name: unicycler test_unicycler_single_end
command: nextflow run tests/modules/unicycler -entry test_unicycler_single_end -c tests/config/nextflow.config
tags:
- unicycler
files:
- path: output/unicycler/test.scaffolds.fa
- path: output/unicycler/test.assembly.gfa
- path: output/unicycler/test.assembly.gfa.gz
- path: output/unicycler/test.scaffolds.fa.gz
- path: output/unicycler/test.unicycler.log
contains:
- "Assembly complete"
- name: unicycler paired-end
command: nextflow run ./tests/modules/unicycler -entry test_unicycler_paired_end -c tests/config/nextflow.config
- name: unicycler test_unicycler_paired_end
command: nextflow run tests/modules/unicycler -entry test_unicycler_paired_end -c tests/config/nextflow.config
tags:
- unicycler
files:
- path: output/unicycler/test.scaffolds.fa
- path: output/unicycler/test.assembly.gfa
- path: output/unicycler/test.assembly.gfa.gz
- path: output/unicycler/test.scaffolds.fa.gz
- path: output/unicycler/test.unicycler.log
contains:
- "Assembly complete"
- name: unicycler test_unicycler_shortreads_longreads
command: nextflow run tests/modules/unicycler -entry test_unicycler_shortreads_longreads -c tests/config/nextflow.config
tags:
- unicycler
files:
- path: output/unicycler/test.assembly.gfa.gz
- path: output/unicycler/test.scaffolds.fa.gz
- path: output/unicycler/test.unicycler.log
contains:
- "Assembly complete"