#!/usr/bin/env nextflow include { EFETCH } from './modules/efetch' include { HAPLINK_VARIANTS } from './modules/haplink/variants' include { MINIMAP2 } from './modules/minimap2' include { NANOFILT } form './modules/nanofilt' include { SHORAH_AMPLICON } from './modules/shorah/amplicon' include { VIQUAS } from './modules/viquas' workflow { Channel .fromPath("*.fastq.gz") .map { file -> tuple(file.simpleName, file) } .set { ch_input } EFETCH('NC_036618.1') EFETCH .out .set { ch_reference } NANOFILT( ch_input ) NANOFILT .out .set { ch_reads } MINIMAP2( ch_reads, ch_reference ) MINIMAP2 .out .set { ch_alignments } HAPLINK_VARIANTS( ch_alignments, ch_reference ) HAPLINK_VARIANTS .out .set { ch_variants } ch_alignments .join( ch_variants ) .set { ch_haplotype_calling } HAPLINK_RAW_HAPLOTYPES( ch_haplotype_calling, ch_reference ) HAPLINK_RAW_HAPLOTYPES .out .map{ [ it[0], 'raw', it[1] ] } .set{ ch_raw_haplotypes } HAPLINK_ML_HAPLOTYPES( ch_haplotype_calling, ch_reference ) HAPLINK_ML_HAPLOTYPES .out .map{ [ it[0], 'ml', it[1] ] } .set{ ch_ml_haplotypes } ch_raw_haplotypes .mix(ch_ml_haplotypes) .set{ ch_all_haplotypes } HAPLINK_SEQUENCES( ch_all_haplotypes, ch_reference ) VIQUAS( ch_alignments, ch_reference ) } process SHORAH_SHOTGUN { label 'process_high' container 'quay.io/biocontainers/shorah:1.99.2--py38h73782ee_8' input: tuple val(prefix), path(bam) path(reference) output: tuple val(prefix), path("*.vcf") tuple val(prefix), path("*support.fas") publishDir "results/shorah-shotgun", mode: 'copy' script: """ shorah shotgun \\ -t ${task.cpus} \\ -f ${reference} \\ -b ${bam} \\ """ } process HAPLINK_RAW_HAPLOTYPES { cpus 2 memory '12.GB' input: tuple val(prefix), path(bam), path(bai), path(vcf) path reference output: tuple val(prefix), path("*.yaml") publishDir "results/raw-haplotypes", mode: 'copy' script: """ export JULIA_NUM_THREADS=${task.cpus} haplink haplotypes \\ "${reference}" \\ "${vcf}" \\ "${bam}" \\ --frequency 0.01 \\ > "${prefix}.yaml" """ } process HAPLINK_ML_HAPLOTYPES { cpus 8 memory '12.GB' input: tuple val(prefix), path(bam), path(bai), path(vcf) path reference output: tuple val(prefix), path("*.yaml") publishDir "results/ml-haplotypes", mode: 'copy' script: """ export JULIA_NUM_THREADS=${task.cpus} haplink haplotypes \\ "${reference}" \\ "${vcf}" \\ "${bam}" \\ --simulated-reads \\ --overlap-min 20 \\ --overlap-max 8000 \\ --frequency 0.01 \\ > "${prefix}.yaml" """ } process HAPLINK_SEQUENCES { cpus 1 memory '6.GB' input: tuple val(prefix), val(method), path(yaml) path reference output: tuple val(prefix), val(method), path("*.fasta") publishDir "results/${method}-haplotypes", mode: 'copy' script: """ export JULIA_NUM_THREADS=${task.cpus} haplink sequences \\ "${reference}" \\ "${yaml}" \\ --prefix "${prefix}" \\ > "${prefix}.fasta" """ }