#!/usr/bin/env nextflow

workflow {

    Channel
        .fromPath("*.fastq.gz")
        .map { file -> tuple(file.simpleName, file) }
        .set { ch_input }

    EFETCH()
    EFETCH
        .out
        .set { ch_reference }

    NANOFILT( ch_input )
    NANOFILT
        .out
        .set { ch_reads }

    MINIMAP2( ch_reads, ch_reference )
    MINIMAP2
        .out
        .set { ch_alignments }

    HAPLINK_VARIANTS( ch_alignments, ch_reference )
    HAPLINK_VARIANTS
        .out
        .set { ch_variants }

    ch_alignments
        .join( ch_variants )
        .set { ch_haplotype_calling }


    HAPLINK_RAW_HAPLOTYPES(
        ch_haplotype_calling,
        ch_reference
    )
    HAPLINK_RAW_HAPLOTYPES
        .out
    .map{ [ it[0], 'raw', it[1] ] }
    .set{ ch_raw_haplotypes }

    HAPLINK_ML_HAPLOTYPES(
        ch_haplotype_calling,
        ch_reference
    )
    HAPLINK_ML_HAPLOTYPES
        .out
        .map{ [ it[0], 'ml', it[1] ] }
        .set{ ch_ml_haplotypes }

    ch_raw_haplotypes
        .mix(ch_ml_haplotypes)
    .set{ ch_all_haplotypes }

    HAPLINK_SEQUENCES(
        ch_all_haplotypes,
        ch_reference
    )
}

process EFETCH {
    cpus 1
    memory '256.MB'
    container 'quay.io/biocontainers/entrez-direct:16.2--he881be0_1'

    publishDir "results", mode: 'copy'

    output:
    path 'idv4.fasta'

    script:
    """
    esearch \\
            -db nucleotide \\
            -query "NC_036618.1" \\
        | efetch \\
            -format fasta \\
        > idv4.fasta
    """
}

process NANOFILT {
    cpus 1
    memory '8.GB'
    container 'quay.io/biocontainers/nanofilt:2.8.0--py_0'

    input:
    tuple val(prefix), path(reads)

    output:
    tuple val(prefix), path("*_trimmed.fastq.gz")

    script:
    """
    gzip \\
            -cdf "${reads}" \\
        | NanoFilt \\
            --logfile "trimmed/${prefix}.nanofilt.log" \\
            --length 100 \\
            --quality 7 \\
            --headcrop 30 \\
            --tailcrop 30 \\
            --minGC 0.1 \\
            --maxGC 0.9 \\
        | gzip \\
        > "${prefix}_trimmed.fastq.gz"
    """
}

process MINIMAP2 {
    cpus 4
    memory '8.GB'
    container 'quay.io/biocontainers/mulled-v2-66534bcbb7031a148b13e2ad42583020b9cd25c4:1679e915ddb9d6b4abda91880c4b48857d471bd8-0'

    input:
    tuple val(prefix), path(reads)
    path reference

    publishDir "results", mode: 'copy'

    output:
    tuple val(prefix), path("*.bam"), path("*.bam.bai")
    script:
    """
    minimap2 \\
            -x map-ont \\
            --MD \\
            --eqx \\
            -t ${task.cpus} \\
            -a \\
            "${reference}" \\
            "${reads}" \\
    | samtools sort \\
    | samtools view \\
            -@ ${task.cpus} \\
            -b \\
            -h \\
            -o "${prefix}.bam"
    samtools index "${prefix}.bam"
    """
}

process SHORAH_AMPLICON {
    label 'process_high'
    container 'quay.io/biocontainers/shorah:1.99.2--py38h73782ee_8'

    input:
    tuple val(prefix), path(bam)
    path(reference)

    output:
    tuple val(prefix), path("*.vcf")
    tuple val(prefix), path("*support.fas")

    publishDir "results/shorah-amplicon", mode: 'copy'

    script:
    """
    shorah amplicon \\
        -t ${task.cpus} \\
        -f ${reference} \\
        -b ${bam} \\
    """
}

process HAPLINK_VARIANTS {
    cpus 2
    memory '12.GB'

    input:
    tuple val(prefix), path(bam), path(bai)
    path reference

    output:
    tuple val(prefix), path("*.vcf")

    publishDir "results", mode: 'copy'

    script:
    """
    export JULIA_NUM_THREADS=${task.cpus}
    haplink variants \\
            "${reference}" \\
            "${bam}" \\
        > "${prefix}.vcf"
    """
}

process HAPLINK_RAW_HAPLOTYPES {
    cpus 2
    memory '12.GB'

    input:
    tuple val(prefix), path(bam), path(bai), path(vcf)
    path reference

    output:
    tuple val(prefix), path("*.yaml")

    publishDir "results/raw-haplotypes", mode: 'copy'

    script:
    """
    export JULIA_NUM_THREADS=${task.cpus}
    haplink haplotypes \\
            "${reference}" \\
            "${vcf}" \\
            "${bam}" \\
        --frequency 0.01 \\
        > "${prefix}.yaml"
    """
}

process HAPLINK_ML_HAPLOTYPES {
    cpus 8
    memory '12.GB'

    input:
    tuple val(prefix), path(bam), path(bai), path(vcf)
    path reference

    output:
    tuple val(prefix), path("*.yaml")

    publishDir "results/ml-haplotypes", mode: 'copy'

    script:
    """
    export JULIA_NUM_THREADS=${task.cpus}
    haplink haplotypes \\
            "${reference}" \\
            "${vcf}" \\
            "${bam}" \\
        --simulated-reads \\
        --overlap-min 20 \\
        --overlap-max 8000 \\
        --frequency 0.01 \\
        > "${prefix}.yaml"
    """
}

process HAPLINK_SEQUENCES {
    cpus 1
    memory '6.GB'

    input:
    tuple val(prefix), val(method), path(yaml)
    path reference

    output:
    tuple val(prefix), val(method), path("*.fasta")

    publishDir "results/${method}-haplotypes", mode: 'copy'

    script:
    """
    export JULIA_NUM_THREADS=${task.cpus}
    haplink sequences \\
            "${reference}" \\
            "${yaml}" \\
            --prefix "${prefix}" \\
        > "${prefix}.fasta"
    """
}