From 2898736cbe223c19d0cc09c1e779783cd6ccc942 Mon Sep 17 00:00:00 2001 From: Bruno Grande Date: Wed, 8 Jun 2022 14:16:52 -0700 Subject: [PATCH 1/4] Add Sage Bionetworks nf-core configuration --- .github/workflows/main.yml | 1 + README.md | 1 + conf/sage.config | 58 ++++++++++++++++++++++++++++++++++++++ docs/sage.md | 24 ++++++++++++++++ nfcore_custom.config | 1 + 5 files changed, 85 insertions(+) create mode 100644 conf/sage.config create mode 100644 docs/sage.md diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index ac31dfe..c142eb3 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -77,6 +77,7 @@ jobs: - "phoenix" - "prince" - "rosalind" + - "sage" - "sahmri" - "sanger" - "seg_globe" diff --git a/README.md b/README.md index 1f37d29..978e606 100644 --- a/README.md +++ b/README.md @@ -131,6 +131,7 @@ Currently documentation is available for the following systems: - [PHOENIX](docs/phoenix.md) - [PRINCE](docs/prince.md) - [ROSALIND](docs/rosalind.md) +- [SAGE BIONETWORKS](docs/sage.md) - [SANGER](docs/sanger.md) - [SEG_GLOBE](docs/seg_globe.md) - [UCT_HPC](docs/uct_hpc.md) diff --git a/conf/sage.config b/conf/sage.config new file mode 100644 index 0000000..5581d85 --- /dev/null +++ b/conf/sage.config @@ -0,0 +1,58 @@ +params { + config_profile_description = 'The Sage Bionetworks profile' + config_profile_contact = 'Bruno Grande (@BrunoGrandePhD)' + config_profile_url = 'https://github.com/Sage-Bionetworks-Workflows' +} + +process { + + cpus = { check_max( 1 * slow(task.attempt), 'cpus' ) } + memory = { check_max( 6.GB * task.attempt, 'memory' ) } + time = { check_max( 24.h * task.attempt, 'time' ) } + + errorStrategy = { task.exitStatus in [143,137,104,134,139,247] ? 'retry' : 'finish' } + maxRetries = 5 + maxErrors = '-1' + + // Process-specific resource requirements + withLabel:process_low { + cpus = { check_max( 4 * slow(task.attempt), 'cpus' ) } + memory = { check_max( 12.GB * task.attempt, 'memory' ) } + time = { check_max( 24.h * task.attempt, 'time' ) } + } + withLabel:process_medium { + cpus = { check_max( 12 * slow(task.attempt), 'cpus' ) } + memory = { check_max( 36.GB * task.attempt, 'memory' ) } + time = { check_max( 48.h * task.attempt, 'time' ) } + } + withLabel:process_high { + cpus = { check_max( 24 * slow(task.attempt), 'cpus' ) } + memory = { check_max( 72.GB * task.attempt, 'memory' ) } + time = { check_max( 96.h * task.attempt, 'time' ) } + } + withLabel:process_long { + time = { check_max( 192.h * task.attempt, 'time' ) } + } + withLabel:process_high_memory { + memory = { check_max( 200.GB * task.attempt, 'memory' ) } + } + +} + +aws { + region = "us-east-1" +} + +params { + igenomes_base = 's3://sage-igenomes/igenomes' + max_memory = 512.GB + max_cpus = 64 + max_time = 168.h // One week +} + +// Function to slow the increase of the resource multipler +// as attempts are made. The rationale is that some CPUs +// don't need to be increased as fast as memory. +def slow(attempt, factor = 2) { + return Math.ceil( attempt / factor) as int +} diff --git a/docs/sage.md b/docs/sage.md new file mode 100644 index 0000000..755e0c2 --- /dev/null +++ b/docs/sage.md @@ -0,0 +1,24 @@ +# nf-core/configs: Sage Bionetworks Configuration + +To use this custom configuration, run the pipeline with `-profile sage`. This will download and launch the [`sage.config`](../conf/sage.config), which contains a number of optimizations relevant to Sage employees running workflows on AWS (_e.g._ using Nextflow Tower). These include: + +- Updating the default value for `igenomes_base` to `s3://sage-igenomes` +- Increasing the default time limits because we run pipelines on AWS +- Enabling retries by default when exit codes relate to insufficient memory +- Allow pending jobs to finish if the number of retries are exhausted +- Slowing the increase in the number of allocated CPU cores on retries + +## Additional information about iGenomes + +The following iGenomes prefixes have been copied from `s3://ngi-igenomes/` (`eu-west-1`) to `s3://sage-igenomes` (`us-east-1`). See [this script](https://github.com/Sage-Bionetworks-Workflows/nextflow-infra/blob/main/bin/mirror-igenomes.sh) for more information. The `sage-igenomes` S3 bucket has been configured to openly available, but files cannot be downloaded out of `us-east-1` to avoid egress charges. You can check the `conf/igenomes.config` file in each nf-core pipeline to figure out the mapping between genome IDs (_i.e._ for `--genome`) and iGenomes prefixes ([example](https://github.com/nf-core/rnaseq/blob/89bf536ce4faa98b4d50a8ec0a0343780bc62e0a/conf/igenomes.config#L14-L26)). + +- **Human Genome Builds** + - `Homo_sapiens/Ensembl/GRCh37` + - `Homo_sapiens/GATK/GRCh37` + - `Homo_sapiens/UCSC/hg19` + - `Homo_sapiens/GATK/GRCh38` + - `Homo_sapiens/NCBI/GRCh38` + - `Homo_sapiens/UCSC/hg38` +- **Mouse Genome Builds** + - `Mus_musculus/Ensembl/GRCm38` + - `Mus_musculus/UCSC/mm10` diff --git a/nfcore_custom.config b/nfcore_custom.config index 88e7227..4b44d8a 100644 --- a/nfcore_custom.config +++ b/nfcore_custom.config @@ -59,6 +59,7 @@ profiles { phoenix { includeConfig "${params.custom_config_base}/conf/phoenix.config" } prince { includeConfig "${params.custom_config_base}/conf/prince.config" } rosalind { includeConfig "${params.custom_config_base}/conf/rosalind.config" } + sage { includeConfig "${params.custom_config_base}/conf/sage.config" } sahmri { includeConfig "${params.custom_config_base}/conf/sahmri.config" } sanger { includeConfig "${params.custom_config_base}/conf/sanger.config"} seg_globe { includeConfig "${params.custom_config_base}/conf/seg_globe.config"} From c7093dee1306bd24bbdc6200097fcc4fccfda2d0 Mon Sep 17 00:00:00 2001 From: Bruno Grande Date: Thu, 16 Jun 2022 12:37:32 -0700 Subject: [PATCH 2/4] Reduce max_memory to fit on an instance --- conf/sage.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/sage.config b/conf/sage.config index 5581d85..da1db72 100644 --- a/conf/sage.config +++ b/conf/sage.config @@ -45,7 +45,7 @@ aws { params { igenomes_base = 's3://sage-igenomes/igenomes' - max_memory = 512.GB + max_memory = 500.GB max_cpus = 64 max_time = 168.h // One week } From ecd898584dc30f42612b44634e618271550a7cfb Mon Sep 17 00:00:00 2001 From: Bruno Grande Date: Wed, 22 Jun 2022 13:54:13 -0700 Subject: [PATCH 3/4] Handle Sarek-specific `max` labels --- conf/sage.config | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/conf/sage.config b/conf/sage.config index da1db72..d2e9d21 100644 --- a/conf/sage.config +++ b/conf/sage.config @@ -34,7 +34,15 @@ process { time = { check_max( 192.h * task.attempt, 'time' ) } } withLabel:process_high_memory { - memory = { check_max( 200.GB * task.attempt, 'memory' ) } + memory = { check_max( 128.GB * task.attempt, 'memory' ) } + } + + // Preventing Sarek labels from using the actual maximums + withLabel:max_memory { + memory = { check_max( 128.GB * task.attempt, 'memory' ) } + } + withLabel:cpus_max { + cpus = { check_max( 24 * slow(task.attempt), 'cpus' ) } } } From 37121faf4a6da6e3130defdf7eb00a0dd113010a Mon Sep 17 00:00:00 2001 From: Bruno Grande Date: Wed, 22 Jun 2022 14:37:43 -0700 Subject: [PATCH 4/4] Fix typoe with label name --- conf/sage.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/sage.config b/conf/sage.config index d2e9d21..3b2a423 100644 --- a/conf/sage.config +++ b/conf/sage.config @@ -38,7 +38,7 @@ process { } // Preventing Sarek labels from using the actual maximums - withLabel:max_memory { + withLabel:memory_max { memory = { check_max( 128.GB * task.attempt, 'memory' ) } } withLabel:cpus_max {