diff --git a/main.nf b/main.nf index af776f3..58335df 100755 --- a/main.nf +++ b/main.nf @@ -1,17 +1,5 @@ #!/usr/bin/env nextflow -include { CLIQUESNV } from './modules/cliquesnv' -include { EFETCH } from './modules/efetch' -include { HAPLINK_HAPLOTYPES as HAPLINK_ML_HAPLOTYPES } from './modules/haplink/haplotypes' -include { HAPLINK_HAPLOTYPES as HAPLINK_RAW_HAPLOTYPES } from './modules/haplink/haplotypes' -include { HAPLINK_SEQUENCES } from './modules/haplink/sequences' -include { HAPLINK_VARIANTS } from './modules/haplink/variants' -include { PREDICTHAPLO } from './modules/predicthaplo' -include { MINIMAP2 } from './modules/minimap2' -include { NANOFILT } from './modules/nanofilt' -include { QUASIRECOMB } from './modules/quasirecomb' -include { SHORAH_AMPLICON } from './modules/shorah/amplicon' -include { SHORAH_SHOTGUN } from './modules/shorah/shotgun' include { VIQUAS } from './modules/viquas' workflow { @@ -21,7 +9,7 @@ workflow { .map { file -> tuple(file.simpleName, file) } .set { ch_input } - EFETCH('NC_036618.1') + EFETCH() EFETCH .out .set { ch_reference } @@ -36,11 +24,6 @@ workflow { .out .set { ch_alignments } - CLIQUESNV( - ch_alignments, - 'snv-pacbio' - ) - HAPLINK_VARIANTS( ch_alignments, ch_reference ) HAPLINK_VARIANTS .out @@ -57,7 +40,7 @@ workflow { ) HAPLINK_RAW_HAPLOTYPES .out - .map{ [ it[0], 'HAPLINK_RAW_HAPLOTYPES', it[1] ] } + .map{ [ it[0], 'raw', it[1] ] } .set{ ch_raw_haplotypes } HAPLINK_ML_HAPLOTYPES( @@ -66,7 +49,7 @@ workflow { ) HAPLINK_ML_HAPLOTYPES .out - .map{ [ it[0], 'HAPLINK_ML_HAPLOTYPES', it[1] ] } + .map{ [ it[0], 'ml', it[1] ] } .set{ ch_ml_haplotypes } ch_raw_haplotypes @@ -78,25 +61,236 @@ workflow { ch_reference ) - PREDICTHAPLO( - ch_alignments, - ch_reference - ) - - QUASIRECOMB( ch_alignments ) - - SHORAH_AMPLICON( - ch_alignments, - ch_reference - ) - - SHORAH_SHOTGUN( - ch_alignments, - ch_reference - ) - VIQUAS( ch_alignments, ch_reference ) } + +process EFETCH { + cpus 1 + memory '256.MB' + container 'quay.io/biocontainers/entrez-direct:16.2--he881be0_1' + + publishDir "results", mode: 'copy' + + output: + path 'idv4.fasta' + + script: + """ + esearch \\ + -db nucleotide \\ + -query "NC_036618.1" \\ + | efetch \\ + -format fasta \\ + > idv4.fasta + """ +} + +process NANOFILT { + cpus 1 + memory '8.GB' + container 'quay.io/biocontainers/nanofilt:2.8.0--py_0' + + input: + tuple val(prefix), path(reads) + + output: + tuple val(prefix), path("*_trimmed.fastq.gz") + + script: + """ + gzip \\ + -cdf "${reads}" \\ + | NanoFilt \\ + --logfile "trimmed/${prefix}.nanofilt.log" \\ + --length 100 \\ + --quality 7 \\ + --headcrop 30 \\ + --tailcrop 30 \\ + --minGC 0.1 \\ + --maxGC 0.9 \\ + | gzip \\ + > "${prefix}_trimmed.fastq.gz" + """ +} + +process MINIMAP2 { + cpus 4 + memory '8.GB' + container 'quay.io/biocontainers/mulled-v2-66534bcbb7031a148b13e2ad42583020b9cd25c4:1679e915ddb9d6b4abda91880c4b48857d471bd8-0' + + input: + tuple val(prefix), path(reads) + path reference + + publishDir "results", mode: 'copy' + + output: + tuple val(prefix), path("*.bam"), path("*.bam.bai") + script: + """ + minimap2 \\ + -x map-ont \\ + --MD \\ + --eqx \\ + -t ${task.cpus} \\ + -a \\ + "${reference}" \\ + "${reads}" \\ + | samtools sort \\ + | samtools view \\ + -@ ${task.cpus} \\ + -b \\ + -h \\ + -o "${prefix}.bam" + samtools index "${prefix}.bam" + """ +} + +process SHORAH_AMPLICON { + label 'process_high' + container 'quay.io/biocontainers/shorah:1.99.2--py38h73782ee_8' + + input: + tuple val(prefix), path(bam) + path(reference) + + output: + tuple val(prefix), path("*.vcf") + tuple val(prefix), path("*support.fas") + + publishDir "results/shorah-amplicon", mode: 'copy' + + script: + """ + shorah amplicon \\ + -t ${task.cpus} \\ + -f ${reference} \\ + -b ${bam} \\ + """ +} + +process SHORAH_SHOTGUN { + label 'process_high' + container 'quay.io/biocontainers/shorah:1.99.2--py38h73782ee_8' + + input: + tuple val(prefix), path(bam) + path(reference) + + output: + tuple val(prefix), path("*.vcf") + tuple val(prefix), path("*support.fas") + + publishDir "results/shorah-shotgun", mode: 'copy' + + script: + """ + shorah shotgun \\ + -t ${task.cpus} \\ + -f ${reference} \\ + -b ${bam} \\ + """ +} + +process HAPLINK_VARIANTS { + cpus 2 + memory '12.GB' + + input: + tuple val(prefix), path(bam), path(bai) + path reference + + output: + tuple val(prefix), path("*.vcf") + + publishDir "results", mode: 'copy' + + script: + """ + export JULIA_NUM_THREADS=${task.cpus} + haplink variants \\ + "${reference}" \\ + "${bam}" \\ + > "${prefix}.vcf" + """ +} + +process HAPLINK_RAW_HAPLOTYPES { + cpus 2 + memory '12.GB' + + input: + tuple val(prefix), path(bam), path(bai), path(vcf) + path reference + + output: + tuple val(prefix), path("*.yaml") + + publishDir "results/raw-haplotypes", mode: 'copy' + + script: + """ + export JULIA_NUM_THREADS=${task.cpus} + haplink haplotypes \\ + "${reference}" \\ + "${vcf}" \\ + "${bam}" \\ + --frequency 0.01 \\ + > "${prefix}.yaml" + """ +} + +process HAPLINK_ML_HAPLOTYPES { + cpus 8 + memory '12.GB' + + input: + tuple val(prefix), path(bam), path(bai), path(vcf) + path reference + + output: + tuple val(prefix), path("*.yaml") + + publishDir "results/ml-haplotypes", mode: 'copy' + + script: + """ + export JULIA_NUM_THREADS=${task.cpus} + haplink haplotypes \\ + "${reference}" \\ + "${vcf}" \\ + "${bam}" \\ + --simulated-reads \\ + --overlap-min 20 \\ + --overlap-max 8000 \\ + --frequency 0.01 \\ + > "${prefix}.yaml" + """ +} + +process HAPLINK_SEQUENCES { + cpus 1 + memory '6.GB' + + input: + tuple val(prefix), val(method), path(yaml) + path reference + + output: + tuple val(prefix), val(method), path("*.fasta") + + publishDir "results/${method}-haplotypes", mode: 'copy' + + script: + """ + export JULIA_NUM_THREADS=${task.cpus} + haplink sequences \\ + "${reference}" \\ + "${yaml}" \\ + --prefix "${prefix}" \\ + > "${prefix}.fasta" + """ +} diff --git a/modules/cliquesnv/main.nf b/modules/cliquesnv/main.nf deleted file mode 100644 index da197e7..0000000 --- a/modules/cliquesnv/main.nf +++ /dev/null @@ -1,31 +0,0 @@ -process CLIQUESNV { - tag "${prefix}" - label 'process_high' - label 'cliquesnv' - - container 'quay.io/biocontainers/cliquesnv:2.0.3--hdfd78af_0' - - input: - tuple val(prefix), path(bam), path(bai) - val(method) - - output: - tuple val(prefix), path("*.json") - tuple val(prefix), path("*.fasta") - - publishDir "results/${task.process}", mode: 'copy' - - when: - task.ext.when == null || task.ext.when - - script: - def jmemstring = task.memory.toMega() + 'M' - """ - cliquesnv \\ - -Xmx${jmemstring} \\ - -threads ${task.cpus} \\ - -m '${method}' \\ - -in "${bam}" \\ - -outDir . - """ -} diff --git a/modules/efetch/main.nf b/modules/efetch/main.nf deleted file mode 100644 index f3ce0b3..0000000 --- a/modules/efetch/main.nf +++ /dev/null @@ -1,24 +0,0 @@ -process EFETCH { - tag "${genome}" - label 'process_single' - - container 'quay.io/biocontainers/entrez-direct:16.2--he881be0_1' - - input: - val(genome) - - publishDir "results", mode: 'copy' - - output: - path 'reference.fasta' - - script: - """ - esearch \\ - -db nucleotide \\ - -query "${genome}" \\ - | efetch \\ - -format fasta \\ - > reference.fasta - """ -} diff --git a/modules/haplink/haplotypes/main.nf b/modules/haplink/haplotypes/main.nf deleted file mode 100644 index 5673af4..0000000 --- a/modules/haplink/haplotypes/main.nf +++ /dev/null @@ -1,32 +0,0 @@ -process HAPLINK_HAPLOTYPES { - tag "${prefix}" - label 'process_high' - label 'haplink' - - container 'quay.io/biocontainers/haplink:1.0.0--h031d066_0' - - input: - tuple val(prefix), path(bam), path(bai), path(vcf) - path reference - - output: - tuple val(prefix), path("*.yaml") - - publishDir "results/${task.process}", mode: 'copy' - - when: - task.ext.when == null || task.ext.when - - script: - def ml_args = task.ext.ml_args ?: '' - """ - export JULIA_NUM_THREADS=${task.cpus} - haplink haplotypes \\ - "${reference}" \\ - "${vcf}" \\ - "${bam}" \\ - --frequency 0.01 \\ - ${ml_args} \\ - > "${prefix}.yaml" - """ -} diff --git a/modules/haplink/sequences/main.nf b/modules/haplink/sequences/main.nf deleted file mode 100644 index fa1c76e..0000000 --- a/modules/haplink/sequences/main.nf +++ /dev/null @@ -1,29 +0,0 @@ -process HAPLINK_SEQUENCES { - tag "${method}: ${prefix}" - label 'process_single' - label 'haplink' - - container 'quay.io/biocontainers/haplink:1.0.0--h031d066_0' - - input: - tuple val(prefix), val(method), path(yaml) - path reference - - output: - tuple val(prefix), val(method), path("*.fasta") - - publishDir "results/${method}", mode: 'copy' - - when: - task.ext.when == null || task.ext.when - - script: - """ - export JULIA_NUM_THREADS=${task.cpus} - haplink sequences \\ - "${reference}" \\ - "${yaml}" \\ - --prefix "${prefix}" \\ - > "${prefix}.fasta" - """ -} diff --git a/modules/haplink/variants/main.nf b/modules/haplink/variants/main.nf deleted file mode 100644 index ec6e9c8..0000000 --- a/modules/haplink/variants/main.nf +++ /dev/null @@ -1,28 +0,0 @@ -process HAPLINK_VARIANTS { - tag "${prefix}" - label 'process_medium' - label 'haplink' - - container 'quay.io/biocontainers/haplink:1.0.0--h031d066_0' - - input: - tuple val(prefix), path(bam), path(bai) - path reference - - output: - tuple val(prefix), path("*.vcf") - - publishDir "results/${task.process}", mode: 'copy' - - when: - task.ext.when == null || task.ext.when - - script: - """ - export JULIA_NUM_THREADS=${task.cpus} - haplink variants \\ - "${reference}" \\ - "${bam}" \\ - > "${prefix}.vcf" - """ -} diff --git a/modules/minimap2/main.nf b/modules/minimap2/main.nf deleted file mode 100644 index 7d3c2aa..0000000 --- a/modules/minimap2/main.nf +++ /dev/null @@ -1,33 +0,0 @@ -process MINIMAP2 { - tag "${prefix}" - label 'process_medium' - - container 'quay.io/biocontainers/mulled-v2-66534bcbb7031a148b13e2ad42583020b9cd25c4:1679e915ddb9d6b4abda91880c4b48857d471bd8-0' - - input: - tuple val(prefix), path(reads) - path reference - - publishDir "results", mode: 'copy' - - output: - tuple val(prefix), path("*.bam"), path("*.bam.bai") - script: - """ - minimap2 \\ - -x map-ont \\ - --MD \\ - --eqx \\ - -t ${task.cpus} \\ - -a \\ - "${reference}" \\ - "${reads}" \\ - | samtools sort \\ - | samtools view \\ - -@ ${task.cpus} \\ - -b \\ - -h \\ - -o "${prefix}.bam" - samtools index "${prefix}.bam" - """ -} diff --git a/modules/nanofilt/main.nf b/modules/nanofilt/main.nf deleted file mode 100644 index c6ec93f..0000000 --- a/modules/nanofilt/main.nf +++ /dev/null @@ -1,28 +0,0 @@ -process NANOFILT { - tag "${prefix}" - label 'process_low' - - container 'quay.io/biocontainers/nanofilt:2.8.0--py_0' - - input: - tuple val(prefix), path(reads) - - output: - tuple val(prefix), path("*_trimmed.fastq.gz") - - script: - """ - gzip \\ - -cdf "${reads}" \\ - | NanoFilt \\ - --logfile "trimmed/${prefix}.nanofilt.log" \\ - --length 100 \\ - --quality 7 \\ - --headcrop 30 \\ - --tailcrop 30 \\ - --minGC 0.1 \\ - --maxGC 0.9 \\ - | gzip \\ - > "${prefix}_trimmed.fastq.gz" - """ -} diff --git a/modules/predicthaplo/main.nf b/modules/predicthaplo/main.nf deleted file mode 100644 index f403c0e..0000000 --- a/modules/predicthaplo/main.nf +++ /dev/null @@ -1,27 +0,0 @@ -process PREDICTHAPLO { - tag "${prefix}" - label 'process_high' - label 'predicthaplo' - - container 'quay.io/biocontainers/predicthaplo:2.1.4--h9b88814_5' - - input: - tuple val(prefix), path(bam), path(bai) - path(reference) - - output: - tuple val(prefix), path("*.fa*") - - publishDir "results/${task.process}", mode: 'copy' - - when: - task.ext.when == null || task.ext.when - - script: - """ - predicthaplo \\ - --sam "${bam}" \\ - --reference "${reference}" \\ - --prefix "${prefix}" - """ -} diff --git a/modules/quasirecomb/main.nf b/modules/quasirecomb/main.nf deleted file mode 100644 index 8e054cf..0000000 --- a/modules/quasirecomb/main.nf +++ /dev/null @@ -1,31 +0,0 @@ -process QUASIRECOMB { - tag "${prefix}" - label 'process_high' - label 'quasirecomb' - - container 'quay.io/biocontainers/quasirecomb:1.2--hdfd78af_1' - - input: - tuple val(prefix), path(bam), path(bai) - - output: - tuple val(prefix), path("*.fasta") - - publishDir "results/${task.process}", mode: 'copy' - - when: - task.ext.when == null || task.ext.when - - script: - def jmemstring = task.memory.toMega() + 'M' - """ - quasirecomb \\ - -XX:+UseParallelGC \\ - -Xms2g \\ - -Xmx${jmemstring} \\ - -XX:+UseNUMA \\ - -XX:NewRatio=9 \\ - -i "${bam}" - mv quasispecies.fasta "${prefix}.fasta" - """ -} diff --git a/modules/shorah/amplicon/main.nf b/modules/shorah/amplicon/main.nf deleted file mode 100644 index c8ccf38..0000000 --- a/modules/shorah/amplicon/main.nf +++ /dev/null @@ -1,28 +0,0 @@ -process SHORAH_AMPLICON { - tag "${prefix}" - label 'process_high' - label 'shorah' - - container 'quay.io/biocontainers/shorah:1.99.2--py38h73782ee_8' - - input: - tuple val(prefix), path(bam), path(bai) - path(reference) - - output: - tuple val(prefix), path("*.vcf") - tuple val(prefix), path("*support.fas") - - publishDir "results/${task.process}", mode: 'copy' - - when: - task.ext.when == null || task.ext.when - - script: - """ - shorah amplicon \\ - -t ${task.cpus} \\ - -f ${reference} \\ - -b ${bam} \\ - """ -} diff --git a/modules/shorah/shotgun/main.nf b/modules/shorah/shotgun/main.nf deleted file mode 100644 index 2b7aa86..0000000 --- a/modules/shorah/shotgun/main.nf +++ /dev/null @@ -1,28 +0,0 @@ -process SHORAH_SHOTGUN { - tag "${prefix}" - label 'process_high' - label 'shorah' - - container 'quay.io/biocontainers/shorah:1.99.2--py38h73782ee_8' - - input: - tuple val(prefix), path(bam), path(bai) - path(reference) - - output: - tuple val(prefix), path("*.vcf") - tuple val(prefix), path("*support.fas") - - publishDir "results/${task.process}", mode: 'copy' - - when: - task.ext.when == null || task.ext.when - - script: - """ - shorah shotgun \\ - -t ${task.cpus} \\ - -f ${reference} \\ - -b ${bam} \\ - """ -} diff --git a/modules/viquas/main.nf b/modules/viquas/main.nf index ad19953..16bb7b5 100644 --- a/modules/viquas/main.nf +++ b/modules/viquas/main.nf @@ -1,7 +1,6 @@ process VIQUAS { tag "${prefix}" label 'process_high' - label 'viquas' container 'code.millironx.com/millironx/haplotyper-battle-royale:viquas' @@ -12,10 +11,7 @@ process VIQUAS { output: tuple val(prefix), path("*.fa") - publishDir "results/${task.process}", mode: 'copy' - - when: - task.ext.when == null || task.ext.when + publishDir "results/viquas", mode: 'copy' script: """ diff --git a/nextflow.config b/nextflow.config index 98bf7b1..84597cb 100644 --- a/nextflow.config +++ b/nextflow.config @@ -1,186 +1,11 @@ -params { - reference = null - - // Config options - config_profile_name = null - config_profile_description = null - custom_config_version = 'master' - custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" - config_profile_contact = null - config_profile_url = null - - // Max resource options - // Defaults only, expecting to be overwritten - max_memory = '128.GB' - max_cpus = 16 - max_time = '240.h' - - // Haplotyper options - // By default, run all haplotypers - cliquesnv = true - haplink = true - predicthaplo = true - quasirecomb = true - shorah = true - viquas = true -} - process { - cpus = { check_max(1 * task.attempt, 'cpus') } - memory = { check_max(6.GB * task.attempt, 'memory') } - time = { check_max(4.h * task.attempt, 'time') } - - errorStrategy = { task.exitStatus in ((130..145) + 104) ? 'retry' : 'finish' } - maxRetries = 1 - maxErrors = '-1' - - withLabel:process_single { - cpus = { check_max(1, 'cpus') } - memory = { check_max( 6.GB * task.attempt, 'memory') } - time = { check_max( 4.h * task.attempt, 'time') } - } - withLabel:process_low { - cpus = { check_max( 2 * task.attempt, 'cpus') } - memory = { check_max( 12.GB * task.attempt, 'memory') } - time = { check_max( 4.h * task.attempt, 'time') } - } - withLabel:process_medium { - cpus = { check_max( 6 * task.attempt, 'cpus') } - memory = { check_max( 36.GB * task.attempt, 'memory') } - time = { check_max( 8.h * task.attempt, 'time') } - } - withLabel:process_high { - cpus = { check_max( 12 * task.attempt, 'cpus') } - memory = { check_max( 72.GB * task.attempt, 'memory') } - time = { check_max( 16.h * task.attempt, 'time') } - } - - withName: 'HAPLINK_ML_HAPLOTYPES' { - ext.ml_args = """ - --simulated-reads \\ - --overlap-min 20 \\ - --overlap-max 8000 \\ - """ - } - - withLabel: 'cliquesnv' { - ext.when = params.cliquesnv - } - withLabel: 'haplink' { - ext.when = params.haplink - } - withLabel: 'predicthaplo' { - ext.when = params.predicthaplo - } - withLabel: 'quasirecomb' { - ext.when = params.quasirecomb - } - withLabel: 'shorah' { - ext.when = params.shorah - } - withLabel: 'viquas' { - ext.when = params.viquas - } + errorStrategy = 'finish' + time = '7d' } -try { - includeConfig "${params.custom_config_base}/nfcore_custom.config" -} catch (Exception e) { - System.err.println("WARNING: Could not load nf-core/config profiles: ${params.custom_config_base}/nfcore_custom.config") -} - -profiles { - docker { - docker.enabled = true - docker.userEmulation = true - conda.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - apptainer.enabled = false - } - singularity { - singularity.enabled = true - singularity.autoMounts = true - conda.enabled = false - docker.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - apptainer.enabled = false - } - podman { - podman.enabled = true - conda.enabled = false - docker.enabled = false - singularity.enabled = false - shifter.enabled = false - charliecloud.enabled = false - apptainer.enabled = false - } - shifter { - shifter.enabled = true - conda.enabled = false - docker.enabled = false - singularity.enabled = false - podman.enabled = false - charliecloud.enabled = false - apptainer.enabled = false - } - charliecloud { - charliecloud.enabled = true - conda.enabled = false - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - apptainer.enabled = false - } - apptainer { - apptainer.enabled = true - apptainer.autoMounts = true - conda.enabled = false - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } -} +singularity.enabled = true env { R_PROFILE_USER = "/.Rprofile" R_ENVIRON_USER = "/.Renviron" } - -def check_max(obj, type) { - if (type == 'memory') { - try { - if (obj.compareTo(params.max_memory as nextflow.util.MemoryUnit) == 1) - return params.max_memory as nextflow.util.MemoryUnit - else - return obj - } catch (all) { - println " ### ERROR ### Max memory '${params.max_memory}' is not valid! Using default value: $obj" - return obj - } - } else if (type == 'time') { - try { - if (obj.compareTo(params.max_time as nextflow.util.Duration) == 1) - return params.max_time as nextflow.util.Duration - else - return obj - } catch (all) { - println " ### ERROR ### Max time '${params.max_time}' is not valid! Using default value: $obj" - return obj - } - } else if (type == 'cpus') { - try { - return Math.min( obj, params.max_cpus as int ) - } catch (all) { - println " ### ERROR ### Max cpus '${params.max_cpus}' is not valid! Using default value: $obj" - return obj - } - } -}