#!/usr/bin/env nextflow workflow { Channel .fromPath("*.fastq.gz") .map { file -> tuple(file.simpleName, file) } .set { ch_input } EFETCH() EFETCH .out .set { ch_reference } NANOFILT( ch_input ) NANOFILT .out .set { ch_reads } MINIMAP2( ch_reads, ch_reference ) MINIMAP2 .out .set { ch_alignments } HAPLINK_VARIANTS( ch_alignments, ch_reference ) HAPLINK_VARIANTS .out .set { ch_variants } ch_alignments .join( ch_variants ) .set { ch_haplotype_calling } HAPLINK_RAW_HAPLOTYPES( ch_haplotype_calling, ch_reference ) HAPLINK_RAW_HAPLOTYPES .out .map{ [ it[0], 'raw', it[1] ] } .set{ ch_raw_haplotypes } HAPLINK_ML_HAPLOTYPES( ch_haplotype_calling, ch_reference ) HAPLINK_ML_HAPLOTYPES .out .map{ [ it[0], 'ml', it[1] ] } .set{ ch_ml_haplotypes } ch_raw_haplotypes .mix(ch_ml_haplotypes) .set{ ch_all_haplotypes } HAPLINK_SEQUENCES( ch_all_haplotypes, ch_reference ) } process EFETCH { cpus 1 memory '256.MB' container 'quay.io/biocontainers/entrez-direct:16.2--he881be0_1' publishDir "results", mode: 'copy' output: path 'idv4.fasta' script: """ esearch \\ -db nucleotide \\ -query "NC_036618.1" \\ | efetch \\ -format fasta \\ > idv4.fasta """ } process NANOFILT { cpus 1 memory '8.GB' container 'quay.io/biocontainers/nanofilt:2.8.0--py_0' input: tuple val(prefix), path(reads) output: tuple val(prefix), path("*_trimmed.fastq.gz") script: """ gzip \\ -cdf "${reads}" \\ | NanoFilt \\ --logfile "trimmed/${prefix}.nanofilt.log" \\ --length 100 \\ --quality 7 \\ --headcrop 30 \\ --tailcrop 30 \\ --minGC 0.1 \\ --maxGC 0.9 \\ | gzip \\ > "${prefix}_trimmed.fastq.gz" """ } process MINIMAP2 { cpus 4 memory '8.GB' container 'quay.io/biocontainers/mulled-v2-66534bcbb7031a148b13e2ad42583020b9cd25c4:1679e915ddb9d6b4abda91880c4b48857d471bd8-0' input: tuple val(prefix), path(reads) path reference publishDir "results", mode: 'copy' output: tuple val(prefix), path("*.bam"), path("*.bam.bai") script: """ minimap2 \\ -x map-ont \\ --MD \\ --eqx \\ -t ${task.cpus} \\ -a \\ "${reference}" \\ "${reads}" \\ | samtools sort \\ | samtools view \\ -@ ${task.cpus} \\ -b \\ -h \\ -o "${prefix}.bam" samtools index "${prefix}.bam" """ } process SHORAH_AMPLICON { label 'process_high' container 'quay.io/biocontainers/shorah:1.99.2--py38h73782ee_8' input: tuple val(prefix), path(bam) path(reference) output: tuple val(prefix), path("*.vcf") tuple val(prefix), path("*support.fas") publishDir "results/shorah-amplicon", mode: 'copy' script: """ shorah amplicon \\ -t ${task.cpus} \\ -f ${reference} \\ -b ${bam} \\ """ } process HAPLINK_VARIANTS { cpus 2 memory '12.GB' input: tuple val(prefix), path(bam), path(bai) path reference output: tuple val(prefix), path("*.vcf") publishDir "results", mode: 'copy' script: """ export JULIA_NUM_THREADS=${task.cpus} haplink variants \\ "${reference}" \\ "${bam}" \\ > "${prefix}.vcf" """ } process HAPLINK_RAW_HAPLOTYPES { cpus 2 memory '12.GB' input: tuple val(prefix), path(bam), path(bai), path(vcf) path reference output: tuple val(prefix), path("*.yaml") publishDir "results/raw-haplotypes", mode: 'copy' script: """ export JULIA_NUM_THREADS=${task.cpus} haplink haplotypes \\ "${reference}" \\ "${vcf}" \\ "${bam}" \\ --frequency 0.01 \\ > "${prefix}.yaml" """ } process HAPLINK_ML_HAPLOTYPES { cpus 8 memory '12.GB' input: tuple val(prefix), path(bam), path(bai), path(vcf) path reference output: tuple val(prefix), path("*.yaml") publishDir "results/ml-haplotypes", mode: 'copy' script: """ export JULIA_NUM_THREADS=${task.cpus} haplink haplotypes \\ "${reference}" \\ "${vcf}" \\ "${bam}" \\ --simulated-reads \\ --overlap-min 20 \\ --overlap-max 8000 \\ --frequency 0.01 \\ > "${prefix}.yaml" """ } process HAPLINK_SEQUENCES { cpus 1 memory '6.GB' input: tuple val(prefix), val(method), path(yaml) path reference output: tuple val(prefix), val(method), path("*.fasta") publishDir "results/${method}-haplotypes", mode: 'copy' script: """ export JULIA_NUM_THREADS=${task.cpus} haplink sequences \\ "${reference}" \\ "${yaml}" \\ --prefix "${prefix}" \\ > "${prefix}.fasta" """ }