2023-06-08 22:55:57 +00:00
|
|
|
#!/usr/bin/env nextflow
|
|
|
|
|
2023-09-23 20:34:27 +00:00
|
|
|
include { EFETCH } from './modules/efetch'
|
2023-09-23 20:21:26 +00:00
|
|
|
include { HAPLINK_VARIANTS } from './modules/haplink/variants'
|
2023-09-23 20:44:08 +00:00
|
|
|
include { NANOFILT } form './modules/nanofilt'
|
2023-08-09 21:09:55 +00:00
|
|
|
include { VIQUAS } from './modules/viquas'
|
|
|
|
|
2023-06-08 22:55:57 +00:00
|
|
|
workflow {
|
|
|
|
|
|
|
|
Channel
|
|
|
|
.fromPath("*.fastq.gz")
|
|
|
|
.map { file -> tuple(file.simpleName, file) }
|
|
|
|
.set { ch_input }
|
|
|
|
|
2023-09-23 20:38:55 +00:00
|
|
|
EFETCH('NC_036618.1')
|
2023-06-08 22:55:57 +00:00
|
|
|
EFETCH
|
|
|
|
.out
|
|
|
|
.set { ch_reference }
|
|
|
|
|
|
|
|
NANOFILT( ch_input )
|
|
|
|
NANOFILT
|
|
|
|
.out
|
|
|
|
.set { ch_reads }
|
|
|
|
|
|
|
|
MINIMAP2( ch_reads, ch_reference )
|
|
|
|
MINIMAP2
|
|
|
|
.out
|
|
|
|
.set { ch_alignments }
|
|
|
|
|
|
|
|
HAPLINK_VARIANTS( ch_alignments, ch_reference )
|
|
|
|
HAPLINK_VARIANTS
|
|
|
|
.out
|
|
|
|
.set { ch_variants }
|
|
|
|
|
|
|
|
ch_alignments
|
|
|
|
.join( ch_variants )
|
|
|
|
.set { ch_haplotype_calling }
|
|
|
|
|
|
|
|
|
|
|
|
HAPLINK_RAW_HAPLOTYPES(
|
|
|
|
ch_haplotype_calling,
|
|
|
|
ch_reference
|
|
|
|
)
|
|
|
|
HAPLINK_RAW_HAPLOTYPES
|
|
|
|
.out
|
|
|
|
.map{ [ it[0], 'raw', it[1] ] }
|
|
|
|
.set{ ch_raw_haplotypes }
|
|
|
|
|
|
|
|
HAPLINK_ML_HAPLOTYPES(
|
|
|
|
ch_haplotype_calling,
|
|
|
|
ch_reference
|
|
|
|
)
|
|
|
|
HAPLINK_ML_HAPLOTYPES
|
|
|
|
.out
|
|
|
|
.map{ [ it[0], 'ml', it[1] ] }
|
|
|
|
.set{ ch_ml_haplotypes }
|
|
|
|
|
|
|
|
ch_raw_haplotypes
|
|
|
|
.mix(ch_ml_haplotypes)
|
|
|
|
.set{ ch_all_haplotypes }
|
|
|
|
|
|
|
|
HAPLINK_SEQUENCES(
|
|
|
|
ch_all_haplotypes,
|
|
|
|
ch_reference
|
|
|
|
)
|
2023-08-09 21:09:55 +00:00
|
|
|
|
|
|
|
VIQUAS(
|
|
|
|
ch_alignments,
|
|
|
|
ch_reference
|
|
|
|
)
|
2023-06-08 22:55:57 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
process MINIMAP2 {
|
|
|
|
cpus 4
|
|
|
|
memory '8.GB'
|
|
|
|
container 'quay.io/biocontainers/mulled-v2-66534bcbb7031a148b13e2ad42583020b9cd25c4:1679e915ddb9d6b4abda91880c4b48857d471bd8-0'
|
|
|
|
|
|
|
|
input:
|
|
|
|
tuple val(prefix), path(reads)
|
|
|
|
path reference
|
|
|
|
|
|
|
|
publishDir "results", mode: 'copy'
|
|
|
|
|
|
|
|
output:
|
|
|
|
tuple val(prefix), path("*.bam"), path("*.bam.bai")
|
|
|
|
script:
|
|
|
|
"""
|
|
|
|
minimap2 \\
|
|
|
|
-x map-ont \\
|
|
|
|
--MD \\
|
|
|
|
--eqx \\
|
|
|
|
-t ${task.cpus} \\
|
|
|
|
-a \\
|
|
|
|
"${reference}" \\
|
|
|
|
"${reads}" \\
|
|
|
|
| samtools sort \\
|
|
|
|
| samtools view \\
|
|
|
|
-@ ${task.cpus} \\
|
|
|
|
-b \\
|
|
|
|
-h \\
|
|
|
|
-o "${prefix}.bam"
|
|
|
|
samtools index "${prefix}.bam"
|
|
|
|
"""
|
|
|
|
}
|
|
|
|
|
2023-06-08 23:10:26 +00:00
|
|
|
process SHORAH_AMPLICON {
|
|
|
|
label 'process_high'
|
|
|
|
container 'quay.io/biocontainers/shorah:1.99.2--py38h73782ee_8'
|
|
|
|
|
|
|
|
input:
|
|
|
|
tuple val(prefix), path(bam)
|
|
|
|
path(reference)
|
|
|
|
|
|
|
|
output:
|
|
|
|
tuple val(prefix), path("*.vcf")
|
|
|
|
tuple val(prefix), path("*support.fas")
|
|
|
|
|
|
|
|
publishDir "results/shorah-amplicon", mode: 'copy'
|
|
|
|
|
|
|
|
script:
|
|
|
|
"""
|
|
|
|
shorah amplicon \\
|
|
|
|
-t ${task.cpus} \\
|
|
|
|
-f ${reference} \\
|
|
|
|
-b ${bam} \\
|
|
|
|
"""
|
|
|
|
}
|
|
|
|
|
2023-06-08 23:12:23 +00:00
|
|
|
process SHORAH_SHOTGUN {
|
|
|
|
label 'process_high'
|
|
|
|
container 'quay.io/biocontainers/shorah:1.99.2--py38h73782ee_8'
|
|
|
|
|
|
|
|
input:
|
|
|
|
tuple val(prefix), path(bam)
|
|
|
|
path(reference)
|
|
|
|
|
|
|
|
output:
|
|
|
|
tuple val(prefix), path("*.vcf")
|
|
|
|
tuple val(prefix), path("*support.fas")
|
|
|
|
|
|
|
|
publishDir "results/shorah-shotgun", mode: 'copy'
|
|
|
|
|
|
|
|
script:
|
|
|
|
"""
|
|
|
|
shorah shotgun \\
|
|
|
|
-t ${task.cpus} \\
|
|
|
|
-f ${reference} \\
|
|
|
|
-b ${bam} \\
|
|
|
|
"""
|
|
|
|
}
|
|
|
|
|
2023-06-08 22:55:57 +00:00
|
|
|
process HAPLINK_RAW_HAPLOTYPES {
|
|
|
|
cpus 2
|
|
|
|
memory '12.GB'
|
|
|
|
|
|
|
|
input:
|
|
|
|
tuple val(prefix), path(bam), path(bai), path(vcf)
|
|
|
|
path reference
|
|
|
|
|
|
|
|
output:
|
|
|
|
tuple val(prefix), path("*.yaml")
|
|
|
|
|
|
|
|
publishDir "results/raw-haplotypes", mode: 'copy'
|
|
|
|
|
|
|
|
script:
|
|
|
|
"""
|
|
|
|
export JULIA_NUM_THREADS=${task.cpus}
|
|
|
|
haplink haplotypes \\
|
|
|
|
"${reference}" \\
|
|
|
|
"${vcf}" \\
|
|
|
|
"${bam}" \\
|
|
|
|
--frequency 0.01 \\
|
|
|
|
> "${prefix}.yaml"
|
|
|
|
"""
|
|
|
|
}
|
|
|
|
|
|
|
|
process HAPLINK_ML_HAPLOTYPES {
|
|
|
|
cpus 8
|
|
|
|
memory '12.GB'
|
|
|
|
|
|
|
|
input:
|
|
|
|
tuple val(prefix), path(bam), path(bai), path(vcf)
|
|
|
|
path reference
|
|
|
|
|
|
|
|
output:
|
|
|
|
tuple val(prefix), path("*.yaml")
|
|
|
|
|
|
|
|
publishDir "results/ml-haplotypes", mode: 'copy'
|
|
|
|
|
|
|
|
script:
|
|
|
|
"""
|
|
|
|
export JULIA_NUM_THREADS=${task.cpus}
|
|
|
|
haplink haplotypes \\
|
|
|
|
"${reference}" \\
|
|
|
|
"${vcf}" \\
|
|
|
|
"${bam}" \\
|
|
|
|
--simulated-reads \\
|
|
|
|
--overlap-min 20 \\
|
|
|
|
--overlap-max 8000 \\
|
|
|
|
--frequency 0.01 \\
|
|
|
|
> "${prefix}.yaml"
|
|
|
|
"""
|
|
|
|
}
|
|
|
|
|
|
|
|
process HAPLINK_SEQUENCES {
|
|
|
|
cpus 1
|
|
|
|
memory '6.GB'
|
|
|
|
|
|
|
|
input:
|
|
|
|
tuple val(prefix), val(method), path(yaml)
|
|
|
|
path reference
|
|
|
|
|
|
|
|
output:
|
|
|
|
tuple val(prefix), val(method), path("*.fasta")
|
|
|
|
|
|
|
|
publishDir "results/${method}-haplotypes", mode: 'copy'
|
|
|
|
|
|
|
|
script:
|
|
|
|
"""
|
|
|
|
export JULIA_NUM_THREADS=${task.cpus}
|
|
|
|
haplink sequences \\
|
|
|
|
"${reference}" \\
|
|
|
|
"${yaml}" \\
|
|
|
|
--prefix "${prefix}" \\
|
|
|
|
> "${prefix}.fasta"
|
|
|
|
"""
|
|
|
|
}
|