haplotyper-battle-royale/main.nf

195 lines
3.9 KiB
Text
Executable file

#!/usr/bin/env nextflow
include { EFETCH } from './modules/efetch'
include { HAPLINK_VARIANTS } from './modules/haplink/variants'
include { MINIMAP2 } from './modules/minimap2'
include { NANOFILT } form './modules/nanofilt'
include { VIQUAS } from './modules/viquas'
workflow {
Channel
.fromPath("*.fastq.gz")
.map { file -> tuple(file.simpleName, file) }
.set { ch_input }
EFETCH('NC_036618.1')
EFETCH
.out
.set { ch_reference }
NANOFILT( ch_input )
NANOFILT
.out
.set { ch_reads }
MINIMAP2( ch_reads, ch_reference )
MINIMAP2
.out
.set { ch_alignments }
HAPLINK_VARIANTS( ch_alignments, ch_reference )
HAPLINK_VARIANTS
.out
.set { ch_variants }
ch_alignments
.join( ch_variants )
.set { ch_haplotype_calling }
HAPLINK_RAW_HAPLOTYPES(
ch_haplotype_calling,
ch_reference
)
HAPLINK_RAW_HAPLOTYPES
.out
.map{ [ it[0], 'raw', it[1] ] }
.set{ ch_raw_haplotypes }
HAPLINK_ML_HAPLOTYPES(
ch_haplotype_calling,
ch_reference
)
HAPLINK_ML_HAPLOTYPES
.out
.map{ [ it[0], 'ml', it[1] ] }
.set{ ch_ml_haplotypes }
ch_raw_haplotypes
.mix(ch_ml_haplotypes)
.set{ ch_all_haplotypes }
HAPLINK_SEQUENCES(
ch_all_haplotypes,
ch_reference
)
VIQUAS(
ch_alignments,
ch_reference
)
}
process SHORAH_AMPLICON {
label 'process_high'
container 'quay.io/biocontainers/shorah:1.99.2--py38h73782ee_8'
input:
tuple val(prefix), path(bam)
path(reference)
output:
tuple val(prefix), path("*.vcf")
tuple val(prefix), path("*support.fas")
publishDir "results/shorah-amplicon", mode: 'copy'
script:
"""
shorah amplicon \\
-t ${task.cpus} \\
-f ${reference} \\
-b ${bam} \\
"""
}
process SHORAH_SHOTGUN {
label 'process_high'
container 'quay.io/biocontainers/shorah:1.99.2--py38h73782ee_8'
input:
tuple val(prefix), path(bam)
path(reference)
output:
tuple val(prefix), path("*.vcf")
tuple val(prefix), path("*support.fas")
publishDir "results/shorah-shotgun", mode: 'copy'
script:
"""
shorah shotgun \\
-t ${task.cpus} \\
-f ${reference} \\
-b ${bam} \\
"""
}
process HAPLINK_RAW_HAPLOTYPES {
cpus 2
memory '12.GB'
input:
tuple val(prefix), path(bam), path(bai), path(vcf)
path reference
output:
tuple val(prefix), path("*.yaml")
publishDir "results/raw-haplotypes", mode: 'copy'
script:
"""
export JULIA_NUM_THREADS=${task.cpus}
haplink haplotypes \\
"${reference}" \\
"${vcf}" \\
"${bam}" \\
--frequency 0.01 \\
> "${prefix}.yaml"
"""
}
process HAPLINK_ML_HAPLOTYPES {
cpus 8
memory '12.GB'
input:
tuple val(prefix), path(bam), path(bai), path(vcf)
path reference
output:
tuple val(prefix), path("*.yaml")
publishDir "results/ml-haplotypes", mode: 'copy'
script:
"""
export JULIA_NUM_THREADS=${task.cpus}
haplink haplotypes \\
"${reference}" \\
"${vcf}" \\
"${bam}" \\
--simulated-reads \\
--overlap-min 20 \\
--overlap-max 8000 \\
--frequency 0.01 \\
> "${prefix}.yaml"
"""
}
process HAPLINK_SEQUENCES {
cpus 1
memory '6.GB'
input:
tuple val(prefix), val(method), path(yaml)
path reference
output:
tuple val(prefix), val(method), path("*.fasta")
publishDir "results/${method}-haplotypes", mode: 'copy'
script:
"""
export JULIA_NUM_THREADS=${task.cpus}
haplink sequences \\
"${reference}" \\
"${yaml}" \\
--prefix "${prefix}" \\
> "${prefix}.fasta"
"""
}