Compare commits

..

No commits in common. "b98a7977013b4e15934bc459abca325872aeb3f9" and "a91632a88329c4a4c9682bcea48168840e8d487a" have entirely different histories.

14 changed files with 235 additions and 539 deletions

268
main.nf
View file

@ -1,17 +1,5 @@
#!/usr/bin/env nextflow
include { CLIQUESNV } from './modules/cliquesnv'
include { EFETCH } from './modules/efetch'
include { HAPLINK_HAPLOTYPES as HAPLINK_ML_HAPLOTYPES } from './modules/haplink/haplotypes'
include { HAPLINK_HAPLOTYPES as HAPLINK_RAW_HAPLOTYPES } from './modules/haplink/haplotypes'
include { HAPLINK_SEQUENCES } from './modules/haplink/sequences'
include { HAPLINK_VARIANTS } from './modules/haplink/variants'
include { PREDICTHAPLO } from './modules/predicthaplo'
include { MINIMAP2 } from './modules/minimap2'
include { NANOFILT } from './modules/nanofilt'
include { QUASIRECOMB } from './modules/quasirecomb'
include { SHORAH_AMPLICON } from './modules/shorah/amplicon'
include { SHORAH_SHOTGUN } from './modules/shorah/shotgun'
include { VIQUAS } from './modules/viquas'
workflow {
@ -21,7 +9,7 @@ workflow {
.map { file -> tuple(file.simpleName, file) }
.set { ch_input }
EFETCH('NC_036618.1')
EFETCH()
EFETCH
.out
.set { ch_reference }
@ -36,11 +24,6 @@ workflow {
.out
.set { ch_alignments }
CLIQUESNV(
ch_alignments,
'snv-pacbio'
)
HAPLINK_VARIANTS( ch_alignments, ch_reference )
HAPLINK_VARIANTS
.out
@ -57,7 +40,7 @@ workflow {
)
HAPLINK_RAW_HAPLOTYPES
.out
.map{ [ it[0], 'HAPLINK_RAW_HAPLOTYPES', it[1] ] }
.map{ [ it[0], 'raw', it[1] ] }
.set{ ch_raw_haplotypes }
HAPLINK_ML_HAPLOTYPES(
@ -66,7 +49,7 @@ workflow {
)
HAPLINK_ML_HAPLOTYPES
.out
.map{ [ it[0], 'HAPLINK_ML_HAPLOTYPES', it[1] ] }
.map{ [ it[0], 'ml', it[1] ] }
.set{ ch_ml_haplotypes }
ch_raw_haplotypes
@ -78,25 +61,236 @@ workflow {
ch_reference
)
PREDICTHAPLO(
ch_alignments,
ch_reference
)
QUASIRECOMB( ch_alignments )
SHORAH_AMPLICON(
ch_alignments,
ch_reference
)
SHORAH_SHOTGUN(
ch_alignments,
ch_reference
)
VIQUAS(
ch_alignments,
ch_reference
)
}
process EFETCH {
cpus 1
memory '256.MB'
container 'quay.io/biocontainers/entrez-direct:16.2--he881be0_1'
publishDir "results", mode: 'copy'
output:
path 'idv4.fasta'
script:
"""
esearch \\
-db nucleotide \\
-query "NC_036618.1" \\
| efetch \\
-format fasta \\
> idv4.fasta
"""
}
process NANOFILT {
cpus 1
memory '8.GB'
container 'quay.io/biocontainers/nanofilt:2.8.0--py_0'
input:
tuple val(prefix), path(reads)
output:
tuple val(prefix), path("*_trimmed.fastq.gz")
script:
"""
gzip \\
-cdf "${reads}" \\
| NanoFilt \\
--logfile "trimmed/${prefix}.nanofilt.log" \\
--length 100 \\
--quality 7 \\
--headcrop 30 \\
--tailcrop 30 \\
--minGC 0.1 \\
--maxGC 0.9 \\
| gzip \\
> "${prefix}_trimmed.fastq.gz"
"""
}
process MINIMAP2 {
cpus 4
memory '8.GB'
container 'quay.io/biocontainers/mulled-v2-66534bcbb7031a148b13e2ad42583020b9cd25c4:1679e915ddb9d6b4abda91880c4b48857d471bd8-0'
input:
tuple val(prefix), path(reads)
path reference
publishDir "results", mode: 'copy'
output:
tuple val(prefix), path("*.bam"), path("*.bam.bai")
script:
"""
minimap2 \\
-x map-ont \\
--MD \\
--eqx \\
-t ${task.cpus} \\
-a \\
"${reference}" \\
"${reads}" \\
| samtools sort \\
| samtools view \\
-@ ${task.cpus} \\
-b \\
-h \\
-o "${prefix}.bam"
samtools index "${prefix}.bam"
"""
}
process SHORAH_AMPLICON {
label 'process_high'
container 'quay.io/biocontainers/shorah:1.99.2--py38h73782ee_8'
input:
tuple val(prefix), path(bam)
path(reference)
output:
tuple val(prefix), path("*.vcf")
tuple val(prefix), path("*support.fas")
publishDir "results/shorah-amplicon", mode: 'copy'
script:
"""
shorah amplicon \\
-t ${task.cpus} \\
-f ${reference} \\
-b ${bam} \\
"""
}
process SHORAH_SHOTGUN {
label 'process_high'
container 'quay.io/biocontainers/shorah:1.99.2--py38h73782ee_8'
input:
tuple val(prefix), path(bam)
path(reference)
output:
tuple val(prefix), path("*.vcf")
tuple val(prefix), path("*support.fas")
publishDir "results/shorah-shotgun", mode: 'copy'
script:
"""
shorah shotgun \\
-t ${task.cpus} \\
-f ${reference} \\
-b ${bam} \\
"""
}
process HAPLINK_VARIANTS {
cpus 2
memory '12.GB'
input:
tuple val(prefix), path(bam), path(bai)
path reference
output:
tuple val(prefix), path("*.vcf")
publishDir "results", mode: 'copy'
script:
"""
export JULIA_NUM_THREADS=${task.cpus}
haplink variants \\
"${reference}" \\
"${bam}" \\
> "${prefix}.vcf"
"""
}
process HAPLINK_RAW_HAPLOTYPES {
cpus 2
memory '12.GB'
input:
tuple val(prefix), path(bam), path(bai), path(vcf)
path reference
output:
tuple val(prefix), path("*.yaml")
publishDir "results/raw-haplotypes", mode: 'copy'
script:
"""
export JULIA_NUM_THREADS=${task.cpus}
haplink haplotypes \\
"${reference}" \\
"${vcf}" \\
"${bam}" \\
--frequency 0.01 \\
> "${prefix}.yaml"
"""
}
process HAPLINK_ML_HAPLOTYPES {
cpus 8
memory '12.GB'
input:
tuple val(prefix), path(bam), path(bai), path(vcf)
path reference
output:
tuple val(prefix), path("*.yaml")
publishDir "results/ml-haplotypes", mode: 'copy'
script:
"""
export JULIA_NUM_THREADS=${task.cpus}
haplink haplotypes \\
"${reference}" \\
"${vcf}" \\
"${bam}" \\
--simulated-reads \\
--overlap-min 20 \\
--overlap-max 8000 \\
--frequency 0.01 \\
> "${prefix}.yaml"
"""
}
process HAPLINK_SEQUENCES {
cpus 1
memory '6.GB'
input:
tuple val(prefix), val(method), path(yaml)
path reference
output:
tuple val(prefix), val(method), path("*.fasta")
publishDir "results/${method}-haplotypes", mode: 'copy'
script:
"""
export JULIA_NUM_THREADS=${task.cpus}
haplink sequences \\
"${reference}" \\
"${yaml}" \\
--prefix "${prefix}" \\
> "${prefix}.fasta"
"""
}

View file

@ -1,31 +0,0 @@
process CLIQUESNV {
tag "${prefix}"
label 'process_high'
label 'cliquesnv'
container 'quay.io/biocontainers/cliquesnv:2.0.3--hdfd78af_0'
input:
tuple val(prefix), path(bam), path(bai)
val(method)
output:
tuple val(prefix), path("*.json")
tuple val(prefix), path("*.fasta")
publishDir "results/${task.process}", mode: 'copy'
when:
task.ext.when == null || task.ext.when
script:
def jmemstring = task.memory.toMega() + 'M'
"""
cliquesnv \\
-Xmx${jmemstring} \\
-threads ${task.cpus} \\
-m '${method}' \\
-in "${bam}" \\
-outDir .
"""
}

View file

@ -1,24 +0,0 @@
process EFETCH {
tag "${genome}"
label 'process_single'
container 'quay.io/biocontainers/entrez-direct:16.2--he881be0_1'
input:
val(genome)
publishDir "results", mode: 'copy'
output:
path 'reference.fasta'
script:
"""
esearch \\
-db nucleotide \\
-query "${genome}" \\
| efetch \\
-format fasta \\
> reference.fasta
"""
}

View file

@ -1,32 +0,0 @@
process HAPLINK_HAPLOTYPES {
tag "${prefix}"
label 'process_high'
label 'haplink'
container 'quay.io/biocontainers/haplink:1.0.0--h031d066_0'
input:
tuple val(prefix), path(bam), path(bai), path(vcf)
path reference
output:
tuple val(prefix), path("*.yaml")
publishDir "results/${task.process}", mode: 'copy'
when:
task.ext.when == null || task.ext.when
script:
def ml_args = task.ext.ml_args ?: ''
"""
export JULIA_NUM_THREADS=${task.cpus}
haplink haplotypes \\
"${reference}" \\
"${vcf}" \\
"${bam}" \\
--frequency 0.01 \\
${ml_args} \\
> "${prefix}.yaml"
"""
}

View file

@ -1,29 +0,0 @@
process HAPLINK_SEQUENCES {
tag "${method}: ${prefix}"
label 'process_single'
label 'haplink'
container 'quay.io/biocontainers/haplink:1.0.0--h031d066_0'
input:
tuple val(prefix), val(method), path(yaml)
path reference
output:
tuple val(prefix), val(method), path("*.fasta")
publishDir "results/${method}", mode: 'copy'
when:
task.ext.when == null || task.ext.when
script:
"""
export JULIA_NUM_THREADS=${task.cpus}
haplink sequences \\
"${reference}" \\
"${yaml}" \\
--prefix "${prefix}" \\
> "${prefix}.fasta"
"""
}

View file

@ -1,28 +0,0 @@
process HAPLINK_VARIANTS {
tag "${prefix}"
label 'process_medium'
label 'haplink'
container 'quay.io/biocontainers/haplink:1.0.0--h031d066_0'
input:
tuple val(prefix), path(bam), path(bai)
path reference
output:
tuple val(prefix), path("*.vcf")
publishDir "results/${task.process}", mode: 'copy'
when:
task.ext.when == null || task.ext.when
script:
"""
export JULIA_NUM_THREADS=${task.cpus}
haplink variants \\
"${reference}" \\
"${bam}" \\
> "${prefix}.vcf"
"""
}

View file

@ -1,33 +0,0 @@
process MINIMAP2 {
tag "${prefix}"
label 'process_medium'
container 'quay.io/biocontainers/mulled-v2-66534bcbb7031a148b13e2ad42583020b9cd25c4:1679e915ddb9d6b4abda91880c4b48857d471bd8-0'
input:
tuple val(prefix), path(reads)
path reference
publishDir "results", mode: 'copy'
output:
tuple val(prefix), path("*.bam"), path("*.bam.bai")
script:
"""
minimap2 \\
-x map-ont \\
--MD \\
--eqx \\
-t ${task.cpus} \\
-a \\
"${reference}" \\
"${reads}" \\
| samtools sort \\
| samtools view \\
-@ ${task.cpus} \\
-b \\
-h \\
-o "${prefix}.bam"
samtools index "${prefix}.bam"
"""
}

View file

@ -1,28 +0,0 @@
process NANOFILT {
tag "${prefix}"
label 'process_low'
container 'quay.io/biocontainers/nanofilt:2.8.0--py_0'
input:
tuple val(prefix), path(reads)
output:
tuple val(prefix), path("*_trimmed.fastq.gz")
script:
"""
gzip \\
-cdf "${reads}" \\
| NanoFilt \\
--logfile "trimmed/${prefix}.nanofilt.log" \\
--length 100 \\
--quality 7 \\
--headcrop 30 \\
--tailcrop 30 \\
--minGC 0.1 \\
--maxGC 0.9 \\
| gzip \\
> "${prefix}_trimmed.fastq.gz"
"""
}

View file

@ -1,27 +0,0 @@
process PREDICTHAPLO {
tag "${prefix}"
label 'process_high'
label 'predicthaplo'
container 'quay.io/biocontainers/predicthaplo:2.1.4--h9b88814_5'
input:
tuple val(prefix), path(bam), path(bai)
path(reference)
output:
tuple val(prefix), path("*.fa*")
publishDir "results/${task.process}", mode: 'copy'
when:
task.ext.when == null || task.ext.when
script:
"""
predicthaplo \\
--sam "${bam}" \\
--reference "${reference}" \\
--prefix "${prefix}"
"""
}

View file

@ -1,31 +0,0 @@
process QUASIRECOMB {
tag "${prefix}"
label 'process_high'
label 'quasirecomb'
container 'quay.io/biocontainers/quasirecomb:1.2--hdfd78af_1'
input:
tuple val(prefix), path(bam), path(bai)
output:
tuple val(prefix), path("*.fasta")
publishDir "results/${task.process}", mode: 'copy'
when:
task.ext.when == null || task.ext.when
script:
def jmemstring = task.memory.toMega() + 'M'
"""
quasirecomb \\
-XX:+UseParallelGC \\
-Xms2g \\
-Xmx${jmemstring} \\
-XX:+UseNUMA \\
-XX:NewRatio=9 \\
-i "${bam}"
mv quasispecies.fasta "${prefix}.fasta"
"""
}

View file

@ -1,28 +0,0 @@
process SHORAH_AMPLICON {
tag "${prefix}"
label 'process_high'
label 'shorah'
container 'quay.io/biocontainers/shorah:1.99.2--py38h73782ee_8'
input:
tuple val(prefix), path(bam), path(bai)
path(reference)
output:
tuple val(prefix), path("*.vcf")
tuple val(prefix), path("*support.fas")
publishDir "results/${task.process}", mode: 'copy'
when:
task.ext.when == null || task.ext.when
script:
"""
shorah amplicon \\
-t ${task.cpus} \\
-f ${reference} \\
-b ${bam} \\
"""
}

View file

@ -1,28 +0,0 @@
process SHORAH_SHOTGUN {
tag "${prefix}"
label 'process_high'
label 'shorah'
container 'quay.io/biocontainers/shorah:1.99.2--py38h73782ee_8'
input:
tuple val(prefix), path(bam), path(bai)
path(reference)
output:
tuple val(prefix), path("*.vcf")
tuple val(prefix), path("*support.fas")
publishDir "results/${task.process}", mode: 'copy'
when:
task.ext.when == null || task.ext.when
script:
"""
shorah shotgun \\
-t ${task.cpus} \\
-f ${reference} \\
-b ${bam} \\
"""
}

View file

@ -1,7 +1,6 @@
process VIQUAS {
tag "${prefix}"
label 'process_high'
label 'viquas'
container 'code.millironx.com/millironx/haplotyper-battle-royale:viquas'
@ -12,10 +11,7 @@ process VIQUAS {
output:
tuple val(prefix), path("*.fa")
publishDir "results/${task.process}", mode: 'copy'
when:
task.ext.when == null || task.ext.when
publishDir "results/viquas", mode: 'copy'
script:
"""

View file

@ -1,186 +1,11 @@
params {
reference = null
// Config options
config_profile_name = null
config_profile_description = null
custom_config_version = 'master'
custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}"
config_profile_contact = null
config_profile_url = null
// Max resource options
// Defaults only, expecting to be overwritten
max_memory = '128.GB'
max_cpus = 16
max_time = '240.h'
// Haplotyper options
// By default, run all haplotypers
cliquesnv = true
haplink = true
predicthaplo = true
quasirecomb = true
shorah = true
viquas = true
}
process {
cpus = { check_max(1 * task.attempt, 'cpus') }
memory = { check_max(6.GB * task.attempt, 'memory') }
time = { check_max(4.h * task.attempt, 'time') }
errorStrategy = { task.exitStatus in ((130..145) + 104) ? 'retry' : 'finish' }
maxRetries = 1
maxErrors = '-1'
withLabel:process_single {
cpus = { check_max(1, 'cpus') }
memory = { check_max( 6.GB * task.attempt, 'memory') }
time = { check_max( 4.h * task.attempt, 'time') }
}
withLabel:process_low {
cpus = { check_max( 2 * task.attempt, 'cpus') }
memory = { check_max( 12.GB * task.attempt, 'memory') }
time = { check_max( 4.h * task.attempt, 'time') }
}
withLabel:process_medium {
cpus = { check_max( 6 * task.attempt, 'cpus') }
memory = { check_max( 36.GB * task.attempt, 'memory') }
time = { check_max( 8.h * task.attempt, 'time') }
}
withLabel:process_high {
cpus = { check_max( 12 * task.attempt, 'cpus') }
memory = { check_max( 72.GB * task.attempt, 'memory') }
time = { check_max( 16.h * task.attempt, 'time') }
}
withName: 'HAPLINK_ML_HAPLOTYPES' {
ext.ml_args = """
--simulated-reads \\
--overlap-min 20 \\
--overlap-max 8000 \\
"""
}
withLabel: 'cliquesnv' {
ext.when = params.cliquesnv
}
withLabel: 'haplink' {
ext.when = params.haplink
}
withLabel: 'predicthaplo' {
ext.when = params.predicthaplo
}
withLabel: 'quasirecomb' {
ext.when = params.quasirecomb
}
withLabel: 'shorah' {
ext.when = params.shorah
}
withLabel: 'viquas' {
ext.when = params.viquas
}
errorStrategy = 'finish'
time = '7d'
}
try {
includeConfig "${params.custom_config_base}/nfcore_custom.config"
} catch (Exception e) {
System.err.println("WARNING: Could not load nf-core/config profiles: ${params.custom_config_base}/nfcore_custom.config")
}
profiles {
docker {
docker.enabled = true
docker.userEmulation = true
conda.enabled = false
singularity.enabled = false
podman.enabled = false
shifter.enabled = false
charliecloud.enabled = false
apptainer.enabled = false
}
singularity {
singularity.enabled = true
singularity.autoMounts = true
conda.enabled = false
docker.enabled = false
podman.enabled = false
shifter.enabled = false
charliecloud.enabled = false
apptainer.enabled = false
}
podman {
podman.enabled = true
conda.enabled = false
docker.enabled = false
singularity.enabled = false
shifter.enabled = false
charliecloud.enabled = false
apptainer.enabled = false
}
shifter {
shifter.enabled = true
conda.enabled = false
docker.enabled = false
singularity.enabled = false
podman.enabled = false
charliecloud.enabled = false
apptainer.enabled = false
}
charliecloud {
charliecloud.enabled = true
conda.enabled = false
docker.enabled = false
singularity.enabled = false
podman.enabled = false
shifter.enabled = false
apptainer.enabled = false
}
apptainer {
apptainer.enabled = true
apptainer.autoMounts = true
conda.enabled = false
docker.enabled = false
singularity.enabled = false
podman.enabled = false
shifter.enabled = false
charliecloud.enabled = false
}
}
singularity.enabled = true
env {
R_PROFILE_USER = "/.Rprofile"
R_ENVIRON_USER = "/.Renviron"
}
def check_max(obj, type) {
if (type == 'memory') {
try {
if (obj.compareTo(params.max_memory as nextflow.util.MemoryUnit) == 1)
return params.max_memory as nextflow.util.MemoryUnit
else
return obj
} catch (all) {
println " ### ERROR ### Max memory '${params.max_memory}' is not valid! Using default value: $obj"
return obj
}
} else if (type == 'time') {
try {
if (obj.compareTo(params.max_time as nextflow.util.Duration) == 1)
return params.max_time as nextflow.util.Duration
else
return obj
} catch (all) {
println " ### ERROR ### Max time '${params.max_time}' is not valid! Using default value: $obj"
return obj
}
} else if (type == 'cpus') {
try {
return Math.min( obj, params.max_cpus as int )
} catch (all) {
println " ### ERROR ### Max cpus '${params.max_cpus}' is not valid! Using default value: $obj"
return obj
}
}
}