From 98eacf030d8090b53133751f6d22de3380632429 Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Wed, 8 Jun 2022 16:27:48 +0200 Subject: [PATCH 1/9] fix for singularity cache for vsc_ugent profile --- conf/vsc_ugent.config | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/conf/vsc_ugent.config b/conf/vsc_ugent.config index f2558fc..6c79590 100644 --- a/conf/vsc_ugent.config +++ b/conf/vsc_ugent.config @@ -19,6 +19,10 @@ singularity { cacheDir = "$scratch_dir/singularity" } +env { + SINGULARITY_CACHEDIR=$scratch_dir/.singularity +} + // Define profiles for each cluster profiles { skitty { @@ -35,7 +39,6 @@ profiles { executor = 'slurm' queue = 'skitty' maxRetries = 2 - beforeScript = "export SINGULARITY_CACHEDIR=$scratch_dir/.singularity" scratch = "$scratch_dir" } } @@ -54,7 +57,6 @@ profiles { executor = 'slurm' queue = 'swalot' maxRetries = 2 - beforeScript = "export SINGULARITY_CACHEDIR=$scratch_dir/.singularity" scratch = "$scratch_dir" } } @@ -73,7 +75,6 @@ profiles { executor = 'slurm' queue = 'victini' maxRetries = 2 - beforeScript = "export SINGULARITY_CACHEDIR=$scratch_dir/.singularity" scratch = "$scratch_dir" } } @@ -92,7 +93,6 @@ profiles { executor = 'slurm' queue = 'kirlia' maxRetries = 2 - beforeScript = "export SINGULARITY_CACHEDIR=$scratch_dir/.singularity" scratch = "$scratch_dir" } } @@ -111,7 +111,6 @@ profiles { executor = 'slurm' queue = 'doduo' maxRetries = 2 - beforeScript = "export SINGULARITY_CACHEDIR=$scratch_dir/.singularity" scratch = "$scratch_dir" } } From b6d38859f0e5ca95a5a18fc362e3386d2de0c56b Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Wed, 8 Jun 2022 16:28:28 +0200 Subject: [PATCH 2/9] fix for singularity cache for vsc_ugent profile --- conf/vsc_ugent.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/vsc_ugent.config b/conf/vsc_ugent.config index 6c79590..6875f19 100644 --- a/conf/vsc_ugent.config +++ b/conf/vsc_ugent.config @@ -20,7 +20,7 @@ singularity { } env { - SINGULARITY_CACHEDIR=$scratch_dir/.singularity + SINGULARITY_CACHEDIR="$scratch_dir/.singularity" } // Define profiles for each cluster From 2898736cbe223c19d0cc09c1e779783cd6ccc942 Mon Sep 17 00:00:00 2001 From: Bruno Grande Date: Wed, 8 Jun 2022 14:16:52 -0700 Subject: [PATCH 3/9] Add Sage Bionetworks nf-core configuration --- .github/workflows/main.yml | 1 + README.md | 1 + conf/sage.config | 58 ++++++++++++++++++++++++++++++++++++++ docs/sage.md | 24 ++++++++++++++++ nfcore_custom.config | 1 + 5 files changed, 85 insertions(+) create mode 100644 conf/sage.config create mode 100644 docs/sage.md diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index ac31dfe..c142eb3 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -77,6 +77,7 @@ jobs: - "phoenix" - "prince" - "rosalind" + - "sage" - "sahmri" - "sanger" - "seg_globe" diff --git a/README.md b/README.md index 1f37d29..978e606 100644 --- a/README.md +++ b/README.md @@ -131,6 +131,7 @@ Currently documentation is available for the following systems: - [PHOENIX](docs/phoenix.md) - [PRINCE](docs/prince.md) - [ROSALIND](docs/rosalind.md) +- [SAGE BIONETWORKS](docs/sage.md) - [SANGER](docs/sanger.md) - [SEG_GLOBE](docs/seg_globe.md) - [UCT_HPC](docs/uct_hpc.md) diff --git a/conf/sage.config b/conf/sage.config new file mode 100644 index 0000000..5581d85 --- /dev/null +++ b/conf/sage.config @@ -0,0 +1,58 @@ +params { + config_profile_description = 'The Sage Bionetworks profile' + config_profile_contact = 'Bruno Grande (@BrunoGrandePhD)' + config_profile_url = 'https://github.com/Sage-Bionetworks-Workflows' +} + +process { + + cpus = { check_max( 1 * slow(task.attempt), 'cpus' ) } + memory = { check_max( 6.GB * task.attempt, 'memory' ) } + time = { check_max( 24.h * task.attempt, 'time' ) } + + errorStrategy = { task.exitStatus in [143,137,104,134,139,247] ? 'retry' : 'finish' } + maxRetries = 5 + maxErrors = '-1' + + // Process-specific resource requirements + withLabel:process_low { + cpus = { check_max( 4 * slow(task.attempt), 'cpus' ) } + memory = { check_max( 12.GB * task.attempt, 'memory' ) } + time = { check_max( 24.h * task.attempt, 'time' ) } + } + withLabel:process_medium { + cpus = { check_max( 12 * slow(task.attempt), 'cpus' ) } + memory = { check_max( 36.GB * task.attempt, 'memory' ) } + time = { check_max( 48.h * task.attempt, 'time' ) } + } + withLabel:process_high { + cpus = { check_max( 24 * slow(task.attempt), 'cpus' ) } + memory = { check_max( 72.GB * task.attempt, 'memory' ) } + time = { check_max( 96.h * task.attempt, 'time' ) } + } + withLabel:process_long { + time = { check_max( 192.h * task.attempt, 'time' ) } + } + withLabel:process_high_memory { + memory = { check_max( 200.GB * task.attempt, 'memory' ) } + } + +} + +aws { + region = "us-east-1" +} + +params { + igenomes_base = 's3://sage-igenomes/igenomes' + max_memory = 512.GB + max_cpus = 64 + max_time = 168.h // One week +} + +// Function to slow the increase of the resource multipler +// as attempts are made. The rationale is that some CPUs +// don't need to be increased as fast as memory. +def slow(attempt, factor = 2) { + return Math.ceil( attempt / factor) as int +} diff --git a/docs/sage.md b/docs/sage.md new file mode 100644 index 0000000..755e0c2 --- /dev/null +++ b/docs/sage.md @@ -0,0 +1,24 @@ +# nf-core/configs: Sage Bionetworks Configuration + +To use this custom configuration, run the pipeline with `-profile sage`. This will download and launch the [`sage.config`](../conf/sage.config), which contains a number of optimizations relevant to Sage employees running workflows on AWS (_e.g._ using Nextflow Tower). These include: + +- Updating the default value for `igenomes_base` to `s3://sage-igenomes` +- Increasing the default time limits because we run pipelines on AWS +- Enabling retries by default when exit codes relate to insufficient memory +- Allow pending jobs to finish if the number of retries are exhausted +- Slowing the increase in the number of allocated CPU cores on retries + +## Additional information about iGenomes + +The following iGenomes prefixes have been copied from `s3://ngi-igenomes/` (`eu-west-1`) to `s3://sage-igenomes` (`us-east-1`). See [this script](https://github.com/Sage-Bionetworks-Workflows/nextflow-infra/blob/main/bin/mirror-igenomes.sh) for more information. The `sage-igenomes` S3 bucket has been configured to openly available, but files cannot be downloaded out of `us-east-1` to avoid egress charges. You can check the `conf/igenomes.config` file in each nf-core pipeline to figure out the mapping between genome IDs (_i.e._ for `--genome`) and iGenomes prefixes ([example](https://github.com/nf-core/rnaseq/blob/89bf536ce4faa98b4d50a8ec0a0343780bc62e0a/conf/igenomes.config#L14-L26)). + +- **Human Genome Builds** + - `Homo_sapiens/Ensembl/GRCh37` + - `Homo_sapiens/GATK/GRCh37` + - `Homo_sapiens/UCSC/hg19` + - `Homo_sapiens/GATK/GRCh38` + - `Homo_sapiens/NCBI/GRCh38` + - `Homo_sapiens/UCSC/hg38` +- **Mouse Genome Builds** + - `Mus_musculus/Ensembl/GRCm38` + - `Mus_musculus/UCSC/mm10` diff --git a/nfcore_custom.config b/nfcore_custom.config index 88e7227..4b44d8a 100644 --- a/nfcore_custom.config +++ b/nfcore_custom.config @@ -59,6 +59,7 @@ profiles { phoenix { includeConfig "${params.custom_config_base}/conf/phoenix.config" } prince { includeConfig "${params.custom_config_base}/conf/prince.config" } rosalind { includeConfig "${params.custom_config_base}/conf/rosalind.config" } + sage { includeConfig "${params.custom_config_base}/conf/sage.config" } sahmri { includeConfig "${params.custom_config_base}/conf/sahmri.config" } sanger { includeConfig "${params.custom_config_base}/conf/sanger.config"} seg_globe { includeConfig "${params.custom_config_base}/conf/seg_globe.config"} From c7093dee1306bd24bbdc6200097fcc4fccfda2d0 Mon Sep 17 00:00:00 2001 From: Bruno Grande Date: Thu, 16 Jun 2022 12:37:32 -0700 Subject: [PATCH 4/9] Reduce max_memory to fit on an instance --- conf/sage.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/sage.config b/conf/sage.config index 5581d85..da1db72 100644 --- a/conf/sage.config +++ b/conf/sage.config @@ -45,7 +45,7 @@ aws { params { igenomes_base = 's3://sage-igenomes/igenomes' - max_memory = 512.GB + max_memory = 500.GB max_cpus = 64 max_time = 168.h // One week } From ecd898584dc30f42612b44634e618271550a7cfb Mon Sep 17 00:00:00 2001 From: Bruno Grande Date: Wed, 22 Jun 2022 13:54:13 -0700 Subject: [PATCH 5/9] Handle Sarek-specific `max` labels --- conf/sage.config | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/conf/sage.config b/conf/sage.config index da1db72..d2e9d21 100644 --- a/conf/sage.config +++ b/conf/sage.config @@ -34,7 +34,15 @@ process { time = { check_max( 192.h * task.attempt, 'time' ) } } withLabel:process_high_memory { - memory = { check_max( 200.GB * task.attempt, 'memory' ) } + memory = { check_max( 128.GB * task.attempt, 'memory' ) } + } + + // Preventing Sarek labels from using the actual maximums + withLabel:max_memory { + memory = { check_max( 128.GB * task.attempt, 'memory' ) } + } + withLabel:cpus_max { + cpus = { check_max( 24 * slow(task.attempt), 'cpus' ) } } } From 37121faf4a6da6e3130defdf7eb00a0dd113010a Mon Sep 17 00:00:00 2001 From: Bruno Grande Date: Wed, 22 Jun 2022 14:37:43 -0700 Subject: [PATCH 6/9] Fix typoe with label name --- conf/sage.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/sage.config b/conf/sage.config index d2e9d21..3b2a423 100644 --- a/conf/sage.config +++ b/conf/sage.config @@ -38,7 +38,7 @@ process { } // Preventing Sarek labels from using the actual maximums - withLabel:max_memory { + withLabel:memory_max { memory = { check_max( 128.GB * task.attempt, 'memory' ) } } withLabel:cpus_max { From 1c7230cb08791b93bcca612836b5d68afade8b66 Mon Sep 17 00:00:00 2001 From: Bruno Grande Date: Thu, 23 Jun 2022 09:15:42 -0700 Subject: [PATCH 7/9] Add Sage-specific Sarek config --- README.md | 2 +- conf/pipeline/sarek/sage.config | 37 +++++++++++++++++++++++++++++++++ docs/pipeline/sarek/sage.md | 7 +++++++ docs/sage.md | 14 +++++++------ pipeline/sarek.config | 3 ++- 5 files changed, 55 insertions(+), 8 deletions(-) create mode 100644 conf/pipeline/sarek/sage.config create mode 100644 docs/pipeline/sarek/sage.md diff --git a/README.md b/README.md index 978e606..0d561fc 100644 --- a/README.md +++ b/README.md @@ -10,7 +10,6 @@ A repository for hosting Nextflow configuration files containing custom paramete - [Configuration and parameters](#configuration-and-parameters) - [Offline usage](#offline-usage) - [Adding a new config](#adding-a-new-config) - - [Checking user hostnames](#checking-user-hostnames) - [Testing](#testing) - [Documentation](#documentation) - [Uploading to `nf-core/configs`](#uploading-to-nf-coreconfigs) @@ -203,6 +202,7 @@ Currently documentation is available for the following pipelines within specific - [MUNIN](docs/pipeline/rnavar/munin.md) - sarek - [MUNIN](docs/pipeline/sarek/munin.md) + - [SAGE BIONETWORKS](docs/pipeline/sarek/sage.md) - [UPPMAX](docs/pipeline/sarek/uppmax.md) - taxprofiler - [EVA](docs/pipeline/taxprofiler/eva.md) diff --git a/conf/pipeline/sarek/sage.config b/conf/pipeline/sarek/sage.config new file mode 100644 index 0000000..b7766bc --- /dev/null +++ b/conf/pipeline/sarek/sage.config @@ -0,0 +1,37 @@ +params { + config_profile_description = 'The Sage Bionetworks profile' + config_profile_contact = 'Bruno Grande (@BrunoGrandePhD)' + config_profile_url = 'https://github.com/Sage-Bionetworks-Workflows' +} + +// Function to ensure that resource requirements don't go beyond a maximum limit +def check_max(obj, type) { + if (type == 'memory') { + try { + if (obj.compareTo(params.max_memory as nextflow.util.MemoryUnit) == 1) + return params.max_memory as nextflow.util.MemoryUnit + else + return obj + } catch (all) { + println " ### ERROR ### Max memory '${params.max_memory}' is not valid! Using default value: $obj" + return obj + } + } else if (type == 'time') { + try { + if (obj.compareTo(params.max_time as nextflow.util.Duration) == 1) + return params.max_time as nextflow.util.Duration + else + return obj + } catch (all) { + println " ### ERROR ### Max time '${params.max_time}' is not valid! Using default value: $obj" + return obj + } + } else if (type == 'cpus') { + try { + return Math.min( obj, params.max_cpus as int ) + } catch (all) { + println " ### ERROR ### Max cpus '${params.max_cpus}' is not valid! Using default value: $obj" + return obj + } + } +} diff --git a/docs/pipeline/sarek/sage.md b/docs/pipeline/sarek/sage.md new file mode 100644 index 0000000..646fdda --- /dev/null +++ b/docs/pipeline/sarek/sage.md @@ -0,0 +1,7 @@ +# nf-core/configs: Sage Bionetworks Sarek-Specific Configuration + +To use this custom configuration, run the pipeline with `-profile sage`. This will download and load the [`sage.config`](../conf/sage.config), which contains a number of optimizations relevant to Sage employees running workflows on AWS (_e.g._ using Nextflow Tower). This profile will also load any applicable pipeline-specific configuration. + +In addition to the global configuration described [here](../../sage.md), this Sarek-specific configuration includes the following tweaks: + +- Define the `check_max()` function, which is missing in Sarek v2. diff --git a/docs/sage.md b/docs/sage.md index 755e0c2..5b0fa49 100644 --- a/docs/sage.md +++ b/docs/sage.md @@ -1,12 +1,14 @@ -# nf-core/configs: Sage Bionetworks Configuration +# nf-core/configs: Sage Bionetworks Global Configuration -To use this custom configuration, run the pipeline with `-profile sage`. This will download and launch the [`sage.config`](../conf/sage.config), which contains a number of optimizations relevant to Sage employees running workflows on AWS (_e.g._ using Nextflow Tower). These include: +To use this custom configuration, run the pipeline with `-profile sage`. This will download and load the [`sage.config`](../conf/sage.config), which contains a number of optimizations relevant to Sage employees running workflows on AWS (_e.g._ using Nextflow Tower). This profile will also load any applicable pipeline-specific configuration. -- Updating the default value for `igenomes_base` to `s3://sage-igenomes` -- Increasing the default time limits because we run pipelines on AWS -- Enabling retries by default when exit codes relate to insufficient memory +This global configuration includes the following tweaks: + +- Update the default value for `igenomes_base` to `s3://sage-igenomes` +- Increase the default time limits because we run pipelines on AWS +- Enable retries by default when exit codes relate to insufficient memory - Allow pending jobs to finish if the number of retries are exhausted -- Slowing the increase in the number of allocated CPU cores on retries +- Slow the increase in the number of allocated CPU cores on retries ## Additional information about iGenomes diff --git a/pipeline/sarek.config b/pipeline/sarek.config index 57d7bdf..36efc99 100644 --- a/pipeline/sarek.config +++ b/pipeline/sarek.config @@ -15,4 +15,5 @@ profiles { cfc { includeConfig "${params.custom_config_base}/conf/pipeline/sarek/cfc.config" } cfc_dev { includeConfig "${params.custom_config_base}/conf/pipeline/sarek/cfc.config" } eddie { includeConfig "${params.custom_config_base}/conf/pipeline/sarek/eddie.config" } -} \ No newline at end of file + sage { includeConfig "${params.custom_config_base}/conf/pipeline/sarek/sage.config" } +} From a19d79f6ffcf1f7c20033e9247a21e2119a0941d Mon Sep 17 00:00:00 2001 From: Bruno Grande Date: Thu, 23 Jun 2022 09:44:16 -0700 Subject: [PATCH 8/9] Move `check_max()` to global config --- conf/sage.config | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/conf/sage.config b/conf/sage.config index 3b2a423..b559f79 100644 --- a/conf/sage.config +++ b/conf/sage.config @@ -64,3 +64,37 @@ params { def slow(attempt, factor = 2) { return Math.ceil( attempt / factor) as int } + + +// Function to ensure that resource requirements don't go +// beyond a maximum limit +def check_max(obj, type) { + if (type == 'memory') { + try { + if (obj.compareTo(params.max_memory as nextflow.util.MemoryUnit) == 1) + return params.max_memory as nextflow.util.MemoryUnit + else + return obj + } catch (all) { + println " ### ERROR ### Max memory '${params.max_memory}' is not valid! Using default value: $obj" + return obj + } + } else if (type == 'time') { + try { + if (obj.compareTo(params.max_time as nextflow.util.Duration) == 1) + return params.max_time as nextflow.util.Duration + else + return obj + } catch (all) { + println " ### ERROR ### Max time '${params.max_time}' is not valid! Using default value: $obj" + return obj + } + } else if (type == 'cpus') { + try { + return Math.min( obj, params.max_cpus as int ) + } catch (all) { + println " ### ERROR ### Max cpus '${params.max_cpus}' is not valid! Using default value: $obj" + return obj + } + } +} From 6f0d9e6c43b8ee0373011f32d4dbf3c17811cd78 Mon Sep 17 00:00:00 2001 From: Bruno Grande Date: Thu, 23 Jun 2022 10:10:28 -0700 Subject: [PATCH 9/9] Complete `check_max()` move to global config --- README.md | 1 - conf/pipeline/sarek/sage.config | 37 --------------------------------- conf/sage.config | 2 +- docs/pipeline/sarek/sage.md | 7 ------- docs/sage.md | 1 + pipeline/sarek.config | 1 - 6 files changed, 2 insertions(+), 47 deletions(-) delete mode 100644 conf/pipeline/sarek/sage.config delete mode 100644 docs/pipeline/sarek/sage.md diff --git a/README.md b/README.md index 0d561fc..d86461c 100644 --- a/README.md +++ b/README.md @@ -202,7 +202,6 @@ Currently documentation is available for the following pipelines within specific - [MUNIN](docs/pipeline/rnavar/munin.md) - sarek - [MUNIN](docs/pipeline/sarek/munin.md) - - [SAGE BIONETWORKS](docs/pipeline/sarek/sage.md) - [UPPMAX](docs/pipeline/sarek/uppmax.md) - taxprofiler - [EVA](docs/pipeline/taxprofiler/eva.md) diff --git a/conf/pipeline/sarek/sage.config b/conf/pipeline/sarek/sage.config deleted file mode 100644 index b7766bc..0000000 --- a/conf/pipeline/sarek/sage.config +++ /dev/null @@ -1,37 +0,0 @@ -params { - config_profile_description = 'The Sage Bionetworks profile' - config_profile_contact = 'Bruno Grande (@BrunoGrandePhD)' - config_profile_url = 'https://github.com/Sage-Bionetworks-Workflows' -} - -// Function to ensure that resource requirements don't go beyond a maximum limit -def check_max(obj, type) { - if (type == 'memory') { - try { - if (obj.compareTo(params.max_memory as nextflow.util.MemoryUnit) == 1) - return params.max_memory as nextflow.util.MemoryUnit - else - return obj - } catch (all) { - println " ### ERROR ### Max memory '${params.max_memory}' is not valid! Using default value: $obj" - return obj - } - } else if (type == 'time') { - try { - if (obj.compareTo(params.max_time as nextflow.util.Duration) == 1) - return params.max_time as nextflow.util.Duration - else - return obj - } catch (all) { - println " ### ERROR ### Max time '${params.max_time}' is not valid! Using default value: $obj" - return obj - } - } else if (type == 'cpus') { - try { - return Math.min( obj, params.max_cpus as int ) - } catch (all) { - println " ### ERROR ### Max cpus '${params.max_cpus}' is not valid! Using default value: $obj" - return obj - } - } -} diff --git a/conf/sage.config b/conf/sage.config index b559f79..e5bfa8b 100644 --- a/conf/sage.config +++ b/conf/sage.config @@ -67,7 +67,7 @@ def slow(attempt, factor = 2) { // Function to ensure that resource requirements don't go -// beyond a maximum limit +// beyond a maximum limit (copied here for Sarek v2) def check_max(obj, type) { if (type == 'memory') { try { diff --git a/docs/pipeline/sarek/sage.md b/docs/pipeline/sarek/sage.md deleted file mode 100644 index 646fdda..0000000 --- a/docs/pipeline/sarek/sage.md +++ /dev/null @@ -1,7 +0,0 @@ -# nf-core/configs: Sage Bionetworks Sarek-Specific Configuration - -To use this custom configuration, run the pipeline with `-profile sage`. This will download and load the [`sage.config`](../conf/sage.config), which contains a number of optimizations relevant to Sage employees running workflows on AWS (_e.g._ using Nextflow Tower). This profile will also load any applicable pipeline-specific configuration. - -In addition to the global configuration described [here](../../sage.md), this Sarek-specific configuration includes the following tweaks: - -- Define the `check_max()` function, which is missing in Sarek v2. diff --git a/docs/sage.md b/docs/sage.md index 5b0fa49..133ccec 100644 --- a/docs/sage.md +++ b/docs/sage.md @@ -9,6 +9,7 @@ This global configuration includes the following tweaks: - Enable retries by default when exit codes relate to insufficient memory - Allow pending jobs to finish if the number of retries are exhausted - Slow the increase in the number of allocated CPU cores on retries +- Define the `check_max()` function, which is missing in Sarek v2 ## Additional information about iGenomes diff --git a/pipeline/sarek.config b/pipeline/sarek.config index 36efc99..512541e 100644 --- a/pipeline/sarek.config +++ b/pipeline/sarek.config @@ -15,5 +15,4 @@ profiles { cfc { includeConfig "${params.custom_config_base}/conf/pipeline/sarek/cfc.config" } cfc_dev { includeConfig "${params.custom_config_base}/conf/pipeline/sarek/cfc.config" } eddie { includeConfig "${params.custom_config_base}/conf/pipeline/sarek/eddie.config" } - sage { includeConfig "${params.custom_config_base}/conf/pipeline/sarek/sage.config" } }