From 76fc461670252883ac61cc099458178ce602d774 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Wed, 4 Aug 2021 10:14:56 +0200 Subject: [PATCH 1/2] Try new strategy for SGE/JAVA bs --- conf/pipeline/eager/eva.config | 236 +++++++++++++++++++++++++++++++-- 1 file changed, 224 insertions(+), 12 deletions(-) diff --git a/conf/pipeline/eager/eva.config b/conf/pipeline/eager/eva.config index 359ca8b..197e930 100644 --- a/conf/pipeline/eager/eva.config +++ b/conf/pipeline/eager/eva.config @@ -69,7 +69,7 @@ process { } withName: adapter_removal { - clusterOptions = { "-S /bin/bash -l h_vmem=${(task.memory.toGiga() * 5)}G,virtual_free=${(task.memory.toGiga() * 5)}G" } + clusterOptions = { "-S /bin/bash -l h_vmem=${(task.memory.toGiga() * 2)}G,virtual_free=${(task.memory.toGiga() * 2)}G" } } withName: dedup { @@ -77,11 +77,11 @@ process { } withName: markduplicates { - clusterOptions = { "-S /bin/bash -l h_vmem=${(task.memory.toGiga() + 6)}G,virtual_free=${(task.memory.toGiga() + 6)}G" } + clusterOptions = { "-S /bin/bash -l h_vmem=${(task.memory.toGiga() * 2)}G,virtual_free=${(task.memory.toGiga() * 2)}G" } } withName: library_merge { - clusterOptions = { "-S /bin/bash -l h_vmem=${(task.memory.toGiga() + 6)}G,virtual_free=${(task.memory.toGiga() + 6)}G" } + clusterOptions = { "-S /bin/bash -l h_vmem=${(task.memory.toGiga() * 2)}G,virtual_free=${(task.memory.toGiga() * 2)}G" } } withName: malt { @@ -97,7 +97,7 @@ process { } withName: mtnucratio { - clusterOptions = { "-S /bin/bash -l h_vmem=${(task.memory.toGiga() * 4)}G,virtual_free=${(task.memory.toGiga() * 4)}G" } + clusterOptions = { "-S /bin/bash -l h_vmem=${(task.memory.toGiga() * 2)}G,virtual_free=${(task.memory.toGiga() * 2)}G" } } withName: vcf2genome { @@ -105,11 +105,11 @@ process { } withName: qualimap { - clusterOptions = { "-S /bin/bash -l h_vmem=${(task.memory.toGiga() + 6)}G,virtual_free=${(task.memory.toGiga() + 6)}G" } + clusterOptions = { "-S /bin/bash -l h_vmem=${(task.memory.toGiga() * 2)}G,virtual_free=${(task.memory.toGiga() * 2)}G" } } withName: damageprofiler { - clusterOptions = { "-S /bin/bash -l h_vmem=${(task.memory.toGiga() + 6)}G,virtual_free=${(task.memory.toGiga() + 6)}G" } + clusterOptions = { "-S /bin/bash -l h_vmem=${(task.memory.toGiga() * 2)}G,virtual_free=${(task.memory.toGiga() * 2)}G" } } withName: circularmapper { @@ -129,6 +129,145 @@ process { profiles { + medium_data { + + params { + // Specific nf-core/configs params + config_profile_contact = 'James Fellows Yates (@jfy133)' + config_profile_description = 'nf-core/eager medium-data EVA profile provided by nf-core/configs' + } + + executor { + queueSize = 8 + } + + process { + + beforeScript = 'export _JAVA_OPTIONS="-XX:ParallelGCThreads=1 -XX:+PrintCommandLineFlags"' + + maxRetries = 2 + + // Solution for clusterOptions comes from here: https://github.com/nextflow-io/nextflow/issues/332 + personal toMega conversion + clusterOptions = { "-S /bin/bash -j y -o output.log -l h_vmem=${task.memory.toGiga()}G,virtual_free=${task.memory.toGiga()}G" } + + withLabel:'sc_tiny'{ + cpus = { check_max( 1, 'cpus' ) } + memory = { check_max( 1.5.GB * task.attempt, 'memory' ) } + time = { task.attempt == 3 ? 1440.h : task.attempt == 2 ? 48.h : 2.h } + } + + withLabel:'sc_small'{ + cpus = { check_max( 1, 'cpus' ) } + memory = { check_max( 6.GB * task.attempt, 'memory' ) } + time = { task.attempt == 3 ? 1440.h : task.attempt == 2 ? 48.h : 2.h } + } + + withLabel:'sc_medium'{ + cpus = { check_max( 1, 'cpus' ) } + memory = { check_max( 12.GB * task.attempt, 'memory' ) } + time = { task.attempt == 3 ? 1440.h : task.attempt == 2 ? 48.h : 2.h } + } + + withLabel:'mc_small'{ + cpus = { check_max( 2, 'cpus' ) } + memory = { check_max( 6.GB * task.attempt, 'memory' ) } + time = { task.attempt == 3 ? 1440.h : task.attempt == 2 ? 48.h : 2.h } + } + + withLabel:'mc_medium' { + cpus = { check_max( 4, 'cpus' ) } + memory = { check_max( 12.GB * task.attempt, 'memory' ) } + time = { task.attempt == 3 ? 1440.h : task.attempt == 2 ? 48.h : 2.h } + } + + withLabel:'mc_large'{ + cpus = { check_max( 8, 'cpus' ) } + memory = { check_max( 24.GB * task.attempt, 'memory' ) } + time = { task.attempt == 3 ? 1440.h : task.attempt == 2 ? 48.h : 2.h } + } + + withLabel:'mc_huge'{ + cpus = { check_max( 32, 'cpus' ) } + memory = { check_max( 256.GB * task.attempt, 'memory' ) } + time = { task.attempt == 3 ? 1440.h : task.attempt == 2 ? 48.h : 2.h } + } + + // Fixes for SGE and Java incompatibility due to Java using more memory than you tell it to use + + withName: makeSeqDict { + clusterOptions = { "-S /bin/bash -l h_vmem=${(task.memory.toGiga() * 3)}G,virtual_free=${(task.memory.toGiga() * 3)}G" } + } + + withName: fastqc { + clusterOptions = { "-S /bin/bash -l h_vmem=${(task.memory.toGiga() * 3)}G,virtual_free=${(task.memory.toGiga() * 3)}G" } + } + + withName: adapter_removal { + clusterOptions = { "-S /bin/bash -l h_vmem=${(task.memory.toGiga() * 3)}G,virtual_free=${(task.memory.toGiga() * 3)}G" } + } + + withName: dedup { + clusterOptions = { "-S /bin/bash -l h_vmem=${(task.memory.toGiga() * 3)}G,virtual_free=${(task.memory.toGiga() * 3)}G" } + } + + withName: markduplicates { + clusterOptions = { "-S /bin/bash -l h_vmem=${(task.memory.toGiga() * 3)}G,virtual_free=${(task.memory.toGiga() * 3)}G" } + } + + withName: library_merge { + clusterOptions = { "-S /bin/bash -l h_vmem=${(task.memory.toGiga() * 3)}G,virtual_free=${(task.memory.toGiga() * 3)}G" } + } + + withName: malt { + clusterOptions = { "-S /bin/bash -l h_vmem=${(task.memory.toGiga() * 3)}G,virtual_free=${(task.memory.toGiga() * 3)}G" } + } + + withName:hostremoval_input_fastq { + cpus = { check_max( 1, 'cpus' ) } + memory = { check_max( 32.GB * task.attempt, 'memory' ) } + time = 1440.h + } + + withName: maltextract { + clusterOptions = { "-S /bin/bash -l h_vmem=${(task.memory.toGiga() * 3)}G,virtual_free=${(task.memory.toGiga() * 3)}G" } + } + + withName: multivcfanalyzer { + clusterOptions = { "-S /bin/bash -l h_vmem=${(task.memory.toGiga() * 3)}G,virtual_free=${(task.memory.toGiga() * 3)}G" } + } + + withName: mtnucratio { + clusterOptions = { "-S /bin/bash -l h_vmem=${(task.memory.toGiga() * 3)}G,virtual_free=${(task.memory.toGiga() * 3)}G" } + } + + withName: vcf2genome { + clusterOptions = { "-S /bin/bash -l h_vmem=${(task.memory.toGiga() * 3)}G,virtual_free=${(task.memory.toGiga() * 3)}G" } + } + + withName: qualimap { + clusterOptions = { "-S /bin/bash -l h_vmem=${(task.memory.toGiga() * 3)}G,virtual_free=${(task.memory.toGiga() * 3)}G" } + } + + withName: damageprofiler { + clusterOptions = { "-S /bin/bash -l h_vmem=${(task.memory.toGiga() * 3)}G,virtual_free=${(task.memory.toGiga() * 3)}G" } + } + + withName: circularmapper { + clusterOptions = { "-S /bin/bash -l h_vmem=${(task.memory.toGiga() * 3)}G,virtual_free=${(task.memory.toGiga() * 2)}G" } + } + + withName: circulargenerator { + clusterOptions = { "-S /bin/bash -l h_vmem=${(task.memory.toGiga() * 3)}G,virtual_free=${(task.memory.toGiga() * 2)}G" } + } + + withName: preseq { + clusterOptions = { "-S /bin/bash -l h_vmem=${(task.memory.toGiga() * 3)}G,virtual_free=${(task.memory.toGiga() * 3)}G" } + errorStrategy = { task.exitStatus in [143,137,104,134,139] ? 'retry' : 'ignore' } + } + + } + } + big_data { params { @@ -143,13 +282,12 @@ profiles { process { - maxRetries = 2 + beforeScript = 'export _JAVA_OPTIONS="-XX:ParallelGCThreads=1 -XX:+PrintCommandLineFlags"' - withName:hostremoval_input_fastq { - cpus = { check_max( 1, 'cpus' ) } - memory = { check_max( 32.GB * task.attempt, 'memory' ) } - time = 1440.h - } + maxRetries = 2 + + // Solution for clusterOptions comes from here: https://github.com/nextflow-io/nextflow/issues/332 + personal toMega conversion + clusterOptions = { "-S /bin/bash -j y -o output.log -l h_vmem=${task.memory.toGiga()}G,virtual_free=${task.memory.toGiga()}G" } withLabel:'sc_tiny'{ cpus = { check_max( 1, 'cpus' ) } @@ -192,6 +330,80 @@ profiles { memory = { check_max( 512.GB * task.attempt, 'memory' ) } time = { task.attempt == 3 ? 1440.h : task.attempt == 2 ? 48.h : 2.h } } + + // Fixes for SGE and Java incompatibility due to Java using more memory than you tell it to use + + withName: makeSeqDict { + clusterOptions = { "-S /bin/bash -l h_vmem=${(task.memory.toGiga() * 6)}G,virtual_free=${(task.memory.toGiga() * 6)}G" } + } + + withName: fastqc { + clusterOptions = { "-S /bin/bash -l h_vmem=${(task.memory.toGiga() * 6)}G,virtual_free=${(task.memory.toGiga() * 6)}G" } + } + + withName: adapter_removal { + clusterOptions = { "-S /bin/bash -l h_vmem=${(task.memory.toGiga() * 6)}G,virtual_free=${(task.memory.toGiga() * 6)}G" } + } + + withName: dedup { + clusterOptions = { "-S /bin/bash -l h_vmem=${(task.memory.toGiga() * 6)}G,virtual_free=${(task.memory.toGiga() * 6)}G" } + } + + withName: markduplicates { + clusterOptions = { "-S /bin/bash -l h_vmem=${(task.memory.toGiga() * 6)}G,virtual_free=${(task.memory.toGiga() * 6)}G" } + } + + withName: library_merge { + clusterOptions = { "-S /bin/bash -l h_vmem=${(task.memory.toGiga() * 6)}G,virtual_free=${(task.memory.toGiga() * 6)}G" } + } + + withName:hostremoval_input_fastq { + cpus = { check_max( 1, 'cpus' ) } + memory = { check_max( 32.GB * task.attempt, 'memory' ) } + time = 1440.h + } + + withName: malt { + clusterOptions = { "-S /bin/bash -l h_vmem=${(task.memory.toGiga() * 6)}G,virtual_free=${(task.memory.toGiga() * 6)}G" } + } + + withName: maltextract { + clusterOptions = { "-S /bin/bash -l h_vmem=${(task.memory.toGiga() * 6)}G,virtual_free=${(task.memory.toGiga() * 6)}G" } + } + + withName: multivcfanalyzer { + clusterOptions = { "-S /bin/bash -l h_vmem=${(task.memory.toGiga() * 6)}G,virtual_free=${(task.memory.toGiga() * 6)}G" } + } + + withName: mtnucratio { + clusterOptions = { "-S /bin/bash -l h_vmem=${(task.memory.toGiga() * 6)}G,virtual_free=${(task.memory.toGiga() * 6)}G" } + } + + withName: vcf2genome { + clusterOptions = { "-S /bin/bash -l h_vmem=${(task.memory.toGiga() * 6)}G,virtual_free=${(task.memory.toGiga() * 6)}G" } + } + + withName: qualimap { + clusterOptions = { "-S /bin/bash -l h_vmem=${(task.memory.toGiga() * 6)}G,virtual_free=${(task.memory.toGiga() * 6)}G" } + } + + withName: damageprofiler { + clusterOptions = { "-S /bin/bash -l h_vmem=${(task.memory.toGiga() * 6)}G,virtual_free=${(task.memory.toGiga() * 6)}G" } + } + + withName: circularmapper { + clusterOptions = { "-S /bin/bash -l h_vmem=${(task.memory.toGiga() * 6)}G,virtual_free=${(task.memory.toGiga() * 6)}G" } + } + + withName: circulargenerator { + clusterOptions = { "-S /bin/bash -l h_vmem=${(task.memory.toGiga() * 6)}G,virtual_free=${(task.memory.toGiga() * 6)}G" } + } + + withName: preseq { + clusterOptions = { "-S /bin/bash -l h_vmem=${(task.memory.toGiga() * 6)}G,virtual_free=${(task.memory.toGiga() * 6)}G" } + errorStrategy = { task.exitStatus in [143,137,104,134,139] ? 'retry' : 'ignore' } + } + } } From b0e344846d68117aff7d06037db40813b1608a15 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Wed, 4 Aug 2021 10:37:49 +0200 Subject: [PATCH 2/2] Changes after code review from @aidaanva --- conf/pipeline/eager/eva.config | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/conf/pipeline/eager/eva.config b/conf/pipeline/eager/eva.config index 197e930..16e185a 100644 --- a/conf/pipeline/eager/eva.config +++ b/conf/pipeline/eager/eva.config @@ -253,11 +253,11 @@ profiles { } withName: circularmapper { - clusterOptions = { "-S /bin/bash -l h_vmem=${(task.memory.toGiga() * 3)}G,virtual_free=${(task.memory.toGiga() * 2)}G" } + clusterOptions = { "-S /bin/bash -l h_vmem=${(task.memory.toGiga() * 3)}G,virtual_free=${(task.memory.toGiga() * 3)}G" } } withName: circulargenerator { - clusterOptions = { "-S /bin/bash -l h_vmem=${(task.memory.toGiga() * 3)}G,virtual_free=${(task.memory.toGiga() * 2)}G" } + clusterOptions = { "-S /bin/bash -l h_vmem=${(task.memory.toGiga() * 3)}G,virtual_free=${(task.memory.toGiga() * 3)}G" } } withName: preseq {