diff --git a/main.nf b/main.nf index 58335df..af776f3 100755 --- a/main.nf +++ b/main.nf @@ -1,5 +1,17 @@ #!/usr/bin/env nextflow +include { CLIQUESNV } from './modules/cliquesnv' +include { EFETCH } from './modules/efetch' +include { HAPLINK_HAPLOTYPES as HAPLINK_ML_HAPLOTYPES } from './modules/haplink/haplotypes' +include { HAPLINK_HAPLOTYPES as HAPLINK_RAW_HAPLOTYPES } from './modules/haplink/haplotypes' +include { HAPLINK_SEQUENCES } from './modules/haplink/sequences' +include { HAPLINK_VARIANTS } from './modules/haplink/variants' +include { PREDICTHAPLO } from './modules/predicthaplo' +include { MINIMAP2 } from './modules/minimap2' +include { NANOFILT } from './modules/nanofilt' +include { QUASIRECOMB } from './modules/quasirecomb' +include { SHORAH_AMPLICON } from './modules/shorah/amplicon' +include { SHORAH_SHOTGUN } from './modules/shorah/shotgun' include { VIQUAS } from './modules/viquas' workflow { @@ -9,7 +21,7 @@ workflow { .map { file -> tuple(file.simpleName, file) } .set { ch_input } - EFETCH() + EFETCH('NC_036618.1') EFETCH .out .set { ch_reference } @@ -24,6 +36,11 @@ workflow { .out .set { ch_alignments } + CLIQUESNV( + ch_alignments, + 'snv-pacbio' + ) + HAPLINK_VARIANTS( ch_alignments, ch_reference ) HAPLINK_VARIANTS .out @@ -40,7 +57,7 @@ workflow { ) HAPLINK_RAW_HAPLOTYPES .out - .map{ [ it[0], 'raw', it[1] ] } + .map{ [ it[0], 'HAPLINK_RAW_HAPLOTYPES', it[1] ] } .set{ ch_raw_haplotypes } HAPLINK_ML_HAPLOTYPES( @@ -49,7 +66,7 @@ workflow { ) HAPLINK_ML_HAPLOTYPES .out - .map{ [ it[0], 'ml', it[1] ] } + .map{ [ it[0], 'HAPLINK_ML_HAPLOTYPES', it[1] ] } .set{ ch_ml_haplotypes } ch_raw_haplotypes @@ -61,236 +78,25 @@ workflow { ch_reference ) + PREDICTHAPLO( + ch_alignments, + ch_reference + ) + + QUASIRECOMB( ch_alignments ) + + SHORAH_AMPLICON( + ch_alignments, + ch_reference + ) + + SHORAH_SHOTGUN( + ch_alignments, + ch_reference + ) + VIQUAS( ch_alignments, ch_reference ) } - -process EFETCH { - cpus 1 - memory '256.MB' - container 'quay.io/biocontainers/entrez-direct:16.2--he881be0_1' - - publishDir "results", mode: 'copy' - - output: - path 'idv4.fasta' - - script: - """ - esearch \\ - -db nucleotide \\ - -query "NC_036618.1" \\ - | efetch \\ - -format fasta \\ - > idv4.fasta - """ -} - -process NANOFILT { - cpus 1 - memory '8.GB' - container 'quay.io/biocontainers/nanofilt:2.8.0--py_0' - - input: - tuple val(prefix), path(reads) - - output: - tuple val(prefix), path("*_trimmed.fastq.gz") - - script: - """ - gzip \\ - -cdf "${reads}" \\ - | NanoFilt \\ - --logfile "trimmed/${prefix}.nanofilt.log" \\ - --length 100 \\ - --quality 7 \\ - --headcrop 30 \\ - --tailcrop 30 \\ - --minGC 0.1 \\ - --maxGC 0.9 \\ - | gzip \\ - > "${prefix}_trimmed.fastq.gz" - """ -} - -process MINIMAP2 { - cpus 4 - memory '8.GB' - container 'quay.io/biocontainers/mulled-v2-66534bcbb7031a148b13e2ad42583020b9cd25c4:1679e915ddb9d6b4abda91880c4b48857d471bd8-0' - - input: - tuple val(prefix), path(reads) - path reference - - publishDir "results", mode: 'copy' - - output: - tuple val(prefix), path("*.bam"), path("*.bam.bai") - script: - """ - minimap2 \\ - -x map-ont \\ - --MD \\ - --eqx \\ - -t ${task.cpus} \\ - -a \\ - "${reference}" \\ - "${reads}" \\ - | samtools sort \\ - | samtools view \\ - -@ ${task.cpus} \\ - -b \\ - -h \\ - -o "${prefix}.bam" - samtools index "${prefix}.bam" - """ -} - -process SHORAH_AMPLICON { - label 'process_high' - container 'quay.io/biocontainers/shorah:1.99.2--py38h73782ee_8' - - input: - tuple val(prefix), path(bam) - path(reference) - - output: - tuple val(prefix), path("*.vcf") - tuple val(prefix), path("*support.fas") - - publishDir "results/shorah-amplicon", mode: 'copy' - - script: - """ - shorah amplicon \\ - -t ${task.cpus} \\ - -f ${reference} \\ - -b ${bam} \\ - """ -} - -process SHORAH_SHOTGUN { - label 'process_high' - container 'quay.io/biocontainers/shorah:1.99.2--py38h73782ee_8' - - input: - tuple val(prefix), path(bam) - path(reference) - - output: - tuple val(prefix), path("*.vcf") - tuple val(prefix), path("*support.fas") - - publishDir "results/shorah-shotgun", mode: 'copy' - - script: - """ - shorah shotgun \\ - -t ${task.cpus} \\ - -f ${reference} \\ - -b ${bam} \\ - """ -} - -process HAPLINK_VARIANTS { - cpus 2 - memory '12.GB' - - input: - tuple val(prefix), path(bam), path(bai) - path reference - - output: - tuple val(prefix), path("*.vcf") - - publishDir "results", mode: 'copy' - - script: - """ - export JULIA_NUM_THREADS=${task.cpus} - haplink variants \\ - "${reference}" \\ - "${bam}" \\ - > "${prefix}.vcf" - """ -} - -process HAPLINK_RAW_HAPLOTYPES { - cpus 2 - memory '12.GB' - - input: - tuple val(prefix), path(bam), path(bai), path(vcf) - path reference - - output: - tuple val(prefix), path("*.yaml") - - publishDir "results/raw-haplotypes", mode: 'copy' - - script: - """ - export JULIA_NUM_THREADS=${task.cpus} - haplink haplotypes \\ - "${reference}" \\ - "${vcf}" \\ - "${bam}" \\ - --frequency 0.01 \\ - > "${prefix}.yaml" - """ -} - -process HAPLINK_ML_HAPLOTYPES { - cpus 8 - memory '12.GB' - - input: - tuple val(prefix), path(bam), path(bai), path(vcf) - path reference - - output: - tuple val(prefix), path("*.yaml") - - publishDir "results/ml-haplotypes", mode: 'copy' - - script: - """ - export JULIA_NUM_THREADS=${task.cpus} - haplink haplotypes \\ - "${reference}" \\ - "${vcf}" \\ - "${bam}" \\ - --simulated-reads \\ - --overlap-min 20 \\ - --overlap-max 8000 \\ - --frequency 0.01 \\ - > "${prefix}.yaml" - """ -} - -process HAPLINK_SEQUENCES { - cpus 1 - memory '6.GB' - - input: - tuple val(prefix), val(method), path(yaml) - path reference - - output: - tuple val(prefix), val(method), path("*.fasta") - - publishDir "results/${method}-haplotypes", mode: 'copy' - - script: - """ - export JULIA_NUM_THREADS=${task.cpus} - haplink sequences \\ - "${reference}" \\ - "${yaml}" \\ - --prefix "${prefix}" \\ - > "${prefix}.fasta" - """ -} diff --git a/modules/cliquesnv/main.nf b/modules/cliquesnv/main.nf new file mode 100644 index 0000000..da197e7 --- /dev/null +++ b/modules/cliquesnv/main.nf @@ -0,0 +1,31 @@ +process CLIQUESNV { + tag "${prefix}" + label 'process_high' + label 'cliquesnv' + + container 'quay.io/biocontainers/cliquesnv:2.0.3--hdfd78af_0' + + input: + tuple val(prefix), path(bam), path(bai) + val(method) + + output: + tuple val(prefix), path("*.json") + tuple val(prefix), path("*.fasta") + + publishDir "results/${task.process}", mode: 'copy' + + when: + task.ext.when == null || task.ext.when + + script: + def jmemstring = task.memory.toMega() + 'M' + """ + cliquesnv \\ + -Xmx${jmemstring} \\ + -threads ${task.cpus} \\ + -m '${method}' \\ + -in "${bam}" \\ + -outDir . + """ +} diff --git a/modules/efetch/main.nf b/modules/efetch/main.nf new file mode 100644 index 0000000..f3ce0b3 --- /dev/null +++ b/modules/efetch/main.nf @@ -0,0 +1,24 @@ +process EFETCH { + tag "${genome}" + label 'process_single' + + container 'quay.io/biocontainers/entrez-direct:16.2--he881be0_1' + + input: + val(genome) + + publishDir "results", mode: 'copy' + + output: + path 'reference.fasta' + + script: + """ + esearch \\ + -db nucleotide \\ + -query "${genome}" \\ + | efetch \\ + -format fasta \\ + > reference.fasta + """ +} diff --git a/modules/haplink/haplotypes/main.nf b/modules/haplink/haplotypes/main.nf new file mode 100644 index 0000000..5673af4 --- /dev/null +++ b/modules/haplink/haplotypes/main.nf @@ -0,0 +1,32 @@ +process HAPLINK_HAPLOTYPES { + tag "${prefix}" + label 'process_high' + label 'haplink' + + container 'quay.io/biocontainers/haplink:1.0.0--h031d066_0' + + input: + tuple val(prefix), path(bam), path(bai), path(vcf) + path reference + + output: + tuple val(prefix), path("*.yaml") + + publishDir "results/${task.process}", mode: 'copy' + + when: + task.ext.when == null || task.ext.when + + script: + def ml_args = task.ext.ml_args ?: '' + """ + export JULIA_NUM_THREADS=${task.cpus} + haplink haplotypes \\ + "${reference}" \\ + "${vcf}" \\ + "${bam}" \\ + --frequency 0.01 \\ + ${ml_args} \\ + > "${prefix}.yaml" + """ +} diff --git a/modules/haplink/sequences/main.nf b/modules/haplink/sequences/main.nf new file mode 100644 index 0000000..fa1c76e --- /dev/null +++ b/modules/haplink/sequences/main.nf @@ -0,0 +1,29 @@ +process HAPLINK_SEQUENCES { + tag "${method}: ${prefix}" + label 'process_single' + label 'haplink' + + container 'quay.io/biocontainers/haplink:1.0.0--h031d066_0' + + input: + tuple val(prefix), val(method), path(yaml) + path reference + + output: + tuple val(prefix), val(method), path("*.fasta") + + publishDir "results/${method}", mode: 'copy' + + when: + task.ext.when == null || task.ext.when + + script: + """ + export JULIA_NUM_THREADS=${task.cpus} + haplink sequences \\ + "${reference}" \\ + "${yaml}" \\ + --prefix "${prefix}" \\ + > "${prefix}.fasta" + """ +} diff --git a/modules/haplink/variants/main.nf b/modules/haplink/variants/main.nf new file mode 100644 index 0000000..ec6e9c8 --- /dev/null +++ b/modules/haplink/variants/main.nf @@ -0,0 +1,28 @@ +process HAPLINK_VARIANTS { + tag "${prefix}" + label 'process_medium' + label 'haplink' + + container 'quay.io/biocontainers/haplink:1.0.0--h031d066_0' + + input: + tuple val(prefix), path(bam), path(bai) + path reference + + output: + tuple val(prefix), path("*.vcf") + + publishDir "results/${task.process}", mode: 'copy' + + when: + task.ext.when == null || task.ext.when + + script: + """ + export JULIA_NUM_THREADS=${task.cpus} + haplink variants \\ + "${reference}" \\ + "${bam}" \\ + > "${prefix}.vcf" + """ +} diff --git a/modules/minimap2/main.nf b/modules/minimap2/main.nf new file mode 100644 index 0000000..7d3c2aa --- /dev/null +++ b/modules/minimap2/main.nf @@ -0,0 +1,33 @@ +process MINIMAP2 { + tag "${prefix}" + label 'process_medium' + + container 'quay.io/biocontainers/mulled-v2-66534bcbb7031a148b13e2ad42583020b9cd25c4:1679e915ddb9d6b4abda91880c4b48857d471bd8-0' + + input: + tuple val(prefix), path(reads) + path reference + + publishDir "results", mode: 'copy' + + output: + tuple val(prefix), path("*.bam"), path("*.bam.bai") + script: + """ + minimap2 \\ + -x map-ont \\ + --MD \\ + --eqx \\ + -t ${task.cpus} \\ + -a \\ + "${reference}" \\ + "${reads}" \\ + | samtools sort \\ + | samtools view \\ + -@ ${task.cpus} \\ + -b \\ + -h \\ + -o "${prefix}.bam" + samtools index "${prefix}.bam" + """ +} diff --git a/modules/nanofilt/main.nf b/modules/nanofilt/main.nf new file mode 100644 index 0000000..c6ec93f --- /dev/null +++ b/modules/nanofilt/main.nf @@ -0,0 +1,28 @@ +process NANOFILT { + tag "${prefix}" + label 'process_low' + + container 'quay.io/biocontainers/nanofilt:2.8.0--py_0' + + input: + tuple val(prefix), path(reads) + + output: + tuple val(prefix), path("*_trimmed.fastq.gz") + + script: + """ + gzip \\ + -cdf "${reads}" \\ + | NanoFilt \\ + --logfile "trimmed/${prefix}.nanofilt.log" \\ + --length 100 \\ + --quality 7 \\ + --headcrop 30 \\ + --tailcrop 30 \\ + --minGC 0.1 \\ + --maxGC 0.9 \\ + | gzip \\ + > "${prefix}_trimmed.fastq.gz" + """ +} diff --git a/modules/predicthaplo/main.nf b/modules/predicthaplo/main.nf new file mode 100644 index 0000000..f403c0e --- /dev/null +++ b/modules/predicthaplo/main.nf @@ -0,0 +1,27 @@ +process PREDICTHAPLO { + tag "${prefix}" + label 'process_high' + label 'predicthaplo' + + container 'quay.io/biocontainers/predicthaplo:2.1.4--h9b88814_5' + + input: + tuple val(prefix), path(bam), path(bai) + path(reference) + + output: + tuple val(prefix), path("*.fa*") + + publishDir "results/${task.process}", mode: 'copy' + + when: + task.ext.when == null || task.ext.when + + script: + """ + predicthaplo \\ + --sam "${bam}" \\ + --reference "${reference}" \\ + --prefix "${prefix}" + """ +} diff --git a/modules/quasirecomb/main.nf b/modules/quasirecomb/main.nf new file mode 100644 index 0000000..8e054cf --- /dev/null +++ b/modules/quasirecomb/main.nf @@ -0,0 +1,31 @@ +process QUASIRECOMB { + tag "${prefix}" + label 'process_high' + label 'quasirecomb' + + container 'quay.io/biocontainers/quasirecomb:1.2--hdfd78af_1' + + input: + tuple val(prefix), path(bam), path(bai) + + output: + tuple val(prefix), path("*.fasta") + + publishDir "results/${task.process}", mode: 'copy' + + when: + task.ext.when == null || task.ext.when + + script: + def jmemstring = task.memory.toMega() + 'M' + """ + quasirecomb \\ + -XX:+UseParallelGC \\ + -Xms2g \\ + -Xmx${jmemstring} \\ + -XX:+UseNUMA \\ + -XX:NewRatio=9 \\ + -i "${bam}" + mv quasispecies.fasta "${prefix}.fasta" + """ +} diff --git a/modules/shorah/amplicon/main.nf b/modules/shorah/amplicon/main.nf new file mode 100644 index 0000000..c8ccf38 --- /dev/null +++ b/modules/shorah/amplicon/main.nf @@ -0,0 +1,28 @@ +process SHORAH_AMPLICON { + tag "${prefix}" + label 'process_high' + label 'shorah' + + container 'quay.io/biocontainers/shorah:1.99.2--py38h73782ee_8' + + input: + tuple val(prefix), path(bam), path(bai) + path(reference) + + output: + tuple val(prefix), path("*.vcf") + tuple val(prefix), path("*support.fas") + + publishDir "results/${task.process}", mode: 'copy' + + when: + task.ext.when == null || task.ext.when + + script: + """ + shorah amplicon \\ + -t ${task.cpus} \\ + -f ${reference} \\ + -b ${bam} \\ + """ +} diff --git a/modules/shorah/shotgun/main.nf b/modules/shorah/shotgun/main.nf new file mode 100644 index 0000000..2b7aa86 --- /dev/null +++ b/modules/shorah/shotgun/main.nf @@ -0,0 +1,28 @@ +process SHORAH_SHOTGUN { + tag "${prefix}" + label 'process_high' + label 'shorah' + + container 'quay.io/biocontainers/shorah:1.99.2--py38h73782ee_8' + + input: + tuple val(prefix), path(bam), path(bai) + path(reference) + + output: + tuple val(prefix), path("*.vcf") + tuple val(prefix), path("*support.fas") + + publishDir "results/${task.process}", mode: 'copy' + + when: + task.ext.when == null || task.ext.when + + script: + """ + shorah shotgun \\ + -t ${task.cpus} \\ + -f ${reference} \\ + -b ${bam} \\ + """ +} diff --git a/modules/viquas/main.nf b/modules/viquas/main.nf index 16bb7b5..ad19953 100644 --- a/modules/viquas/main.nf +++ b/modules/viquas/main.nf @@ -1,6 +1,7 @@ process VIQUAS { tag "${prefix}" label 'process_high' + label 'viquas' container 'code.millironx.com/millironx/haplotyper-battle-royale:viquas' @@ -11,7 +12,10 @@ process VIQUAS { output: tuple val(prefix), path("*.fa") - publishDir "results/viquas", mode: 'copy' + publishDir "results/${task.process}", mode: 'copy' + + when: + task.ext.when == null || task.ext.when script: """ diff --git a/nextflow.config b/nextflow.config index 84597cb..98bf7b1 100644 --- a/nextflow.config +++ b/nextflow.config @@ -1,11 +1,186 @@ -process { - errorStrategy = 'finish' - time = '7d' +params { + reference = null + + // Config options + config_profile_name = null + config_profile_description = null + custom_config_version = 'master' + custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" + config_profile_contact = null + config_profile_url = null + + // Max resource options + // Defaults only, expecting to be overwritten + max_memory = '128.GB' + max_cpus = 16 + max_time = '240.h' + + // Haplotyper options + // By default, run all haplotypers + cliquesnv = true + haplink = true + predicthaplo = true + quasirecomb = true + shorah = true + viquas = true } -singularity.enabled = true +process { + cpus = { check_max(1 * task.attempt, 'cpus') } + memory = { check_max(6.GB * task.attempt, 'memory') } + time = { check_max(4.h * task.attempt, 'time') } + + errorStrategy = { task.exitStatus in ((130..145) + 104) ? 'retry' : 'finish' } + maxRetries = 1 + maxErrors = '-1' + + withLabel:process_single { + cpus = { check_max(1, 'cpus') } + memory = { check_max( 6.GB * task.attempt, 'memory') } + time = { check_max( 4.h * task.attempt, 'time') } + } + withLabel:process_low { + cpus = { check_max( 2 * task.attempt, 'cpus') } + memory = { check_max( 12.GB * task.attempt, 'memory') } + time = { check_max( 4.h * task.attempt, 'time') } + } + withLabel:process_medium { + cpus = { check_max( 6 * task.attempt, 'cpus') } + memory = { check_max( 36.GB * task.attempt, 'memory') } + time = { check_max( 8.h * task.attempt, 'time') } + } + withLabel:process_high { + cpus = { check_max( 12 * task.attempt, 'cpus') } + memory = { check_max( 72.GB * task.attempt, 'memory') } + time = { check_max( 16.h * task.attempt, 'time') } + } + + withName: 'HAPLINK_ML_HAPLOTYPES' { + ext.ml_args = """ + --simulated-reads \\ + --overlap-min 20 \\ + --overlap-max 8000 \\ + """ + } + + withLabel: 'cliquesnv' { + ext.when = params.cliquesnv + } + withLabel: 'haplink' { + ext.when = params.haplink + } + withLabel: 'predicthaplo' { + ext.when = params.predicthaplo + } + withLabel: 'quasirecomb' { + ext.when = params.quasirecomb + } + withLabel: 'shorah' { + ext.when = params.shorah + } + withLabel: 'viquas' { + ext.when = params.viquas + } +} + +try { + includeConfig "${params.custom_config_base}/nfcore_custom.config" +} catch (Exception e) { + System.err.println("WARNING: Could not load nf-core/config profiles: ${params.custom_config_base}/nfcore_custom.config") +} + +profiles { + docker { + docker.enabled = true + docker.userEmulation = true + conda.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + apptainer.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + conda.enabled = false + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + apptainer.enabled = false + } + podman { + podman.enabled = true + conda.enabled = false + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + apptainer.enabled = false + } + shifter { + shifter.enabled = true + conda.enabled = false + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + apptainer.enabled = false + } + charliecloud { + charliecloud.enabled = true + conda.enabled = false + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + apptainer.enabled = false + } + apptainer { + apptainer.enabled = true + apptainer.autoMounts = true + conda.enabled = false + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } +} env { R_PROFILE_USER = "/.Rprofile" R_ENVIRON_USER = "/.Renviron" } + +def check_max(obj, type) { + if (type == 'memory') { + try { + if (obj.compareTo(params.max_memory as nextflow.util.MemoryUnit) == 1) + return params.max_memory as nextflow.util.MemoryUnit + else + return obj + } catch (all) { + println " ### ERROR ### Max memory '${params.max_memory}' is not valid! Using default value: $obj" + return obj + } + } else if (type == 'time') { + try { + if (obj.compareTo(params.max_time as nextflow.util.Duration) == 1) + return params.max_time as nextflow.util.Duration + else + return obj + } catch (all) { + println " ### ERROR ### Max time '${params.max_time}' is not valid! Using default value: $obj" + return obj + } + } else if (type == 'cpus') { + try { + return Math.min( obj, params.max_cpus as int ) + } catch (all) { + println " ### ERROR ### Max cpus '${params.max_cpus}' is not valid! Using default value: $obj" + return obj + } + } +}