Merge branch 'master' into eva-java-correction

2024-11-24 09:09:56 +00:00 · 2022-07-04 13:35:50 +02:00 · 2022-07-04 13:35:50 +02:00 · a141ff626b
commit a141ff626b
parent acabcbd9e8 49e779e337
7 changed files with 137 additions and 9 deletions
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@ -77,6 +77,7 @@ jobs:
          - "phoenix"
          - "prince"
          - "rosalind"
+          - "sage"
          - "sahmri"
          - "sanger"
          - "seg_globe"
--- a/README.md
+++ b/README.md
@ -10,7 +10,6 @@ A repository for hosting Nextflow configuration files containing custom paramete
  - [Configuration and parameters](#configuration-and-parameters)
  - [Offline usage](#offline-usage)
 - [Adding a new config](#adding-a-new-config)
-  - [Checking user hostnames](#checking-user-hostnames)
  - [Testing](#testing)
  - [Documentation](#documentation)
  - [Uploading to `nf-core/configs`](#uploading-to-nf-coreconfigs)
@ -131,6 +130,7 @@ Currently documentation is available for the following systems:
 - [PHOENIX](docs/phoenix.md)
 - [PRINCE](docs/prince.md)
 - [ROSALIND](docs/rosalind.md)
+- [SAGE BIONETWORKS](docs/sage.md)
 - [SANGER](docs/sanger.md)
 - [SEG_GLOBE](docs/seg_globe.md)
 - [UCT_HPC](docs/uct_hpc.md)
--- a/conf/sage.config
+++ b/conf/sage.config
@ -0,0 +1,100 @@
+params {
+  config_profile_description = 'The Sage Bionetworks profile'
+  config_profile_contact = 'Bruno Grande (@BrunoGrandePhD)'
+  config_profile_url = 'https://github.com/Sage-Bionetworks-Workflows'
+}
+
+process {
+
+  cpus   = { check_max( 1    * slow(task.attempt), 'cpus'   ) }
+  memory = { check_max( 6.GB * task.attempt,       'memory' ) }
+  time   = { check_max( 24.h * task.attempt,       'time'   ) }
+
+  errorStrategy = { task.exitStatus in [143,137,104,134,139,247] ? 'retry' : 'finish' }
+  maxRetries    = 5
+  maxErrors     = '-1'
+
+  // Process-specific resource requirements
+  withLabel:process_low {
+    cpus   = { check_max( 4     * slow(task.attempt),  'cpus'   ) }
+    memory = { check_max( 12.GB * task.attempt,        'memory' ) }
+    time   = { check_max( 24.h  * task.attempt,        'time'   ) }
+  }
+  withLabel:process_medium {
+    cpus   = { check_max( 12    * slow(task.attempt), 'cpus'   ) }
+    memory = { check_max( 36.GB * task.attempt,       'memory' ) }
+    time   = { check_max( 48.h  * task.attempt,       'time'   ) }
+  }
+  withLabel:process_high {
+    cpus   = { check_max( 24    * slow(task.attempt), 'cpus'   ) }
+    memory = { check_max( 72.GB * task.attempt,       'memory' ) }
+    time   = { check_max( 96.h  * task.attempt,       'time'   ) }
+  }
+  withLabel:process_long {
+    time   = { check_max( 192.h  * task.attempt,   'time'   ) }
+  }
+  withLabel:process_high_memory {
+    memory = { check_max( 128.GB * task.attempt,   'memory' ) }
+  }
+
+  // Preventing Sarek labels from using the actual maximums
+  withLabel:memory_max {
+    memory = { check_max( 128.GB * task.attempt,   'memory' ) }
+  }
+  withLabel:cpus_max {
+    cpus   = { check_max( 24    * slow(task.attempt), 'cpus'   ) }
+  }
+
+}
+
+aws {
+  region = "us-east-1"
+}
+
+params {
+  igenomes_base = 's3://sage-igenomes/igenomes'
+  max_memory    = 500.GB
+  max_cpus      = 64
+  max_time      = 168.h  // One week
+}
+
+// Function to slow the increase of the resource multipler
+// as attempts are made. The rationale is that some CPUs
+// don't need to be increased as fast as memory.
+def slow(attempt, factor = 2) {
+  return Math.ceil( attempt / factor) as int
+}
+
+
+// Function to ensure that resource requirements don't go
+// beyond a maximum limit (copied here for Sarek v2)
+def check_max(obj, type) {
+    if (type == 'memory') {
+        try {
+            if (obj.compareTo(params.max_memory as nextflow.util.MemoryUnit) == 1)
+                return params.max_memory as nextflow.util.MemoryUnit
+            else
+                return obj
+        } catch (all) {
+            println "   ### ERROR ###   Max memory '${params.max_memory}' is not valid! Using default value: $obj"
+            return obj
+        }
+    } else if (type == 'time') {
+        try {
+            if (obj.compareTo(params.max_time as nextflow.util.Duration) == 1)
+                return params.max_time as nextflow.util.Duration
+            else
+                return obj
+        } catch (all) {
+            println "   ### ERROR ###   Max time '${params.max_time}' is not valid! Using default value: $obj"
+            return obj
+        }
+    } else if (type == 'cpus') {
+        try {
+            return Math.min( obj, params.max_cpus as int )
+        } catch (all) {
+            println "   ### ERROR ###   Max cpus '${params.max_cpus}' is not valid! Using default value: $obj"
+            return obj
+        }
+    }
+}
--- a/conf/vsc_ugent.config
+++ b/conf/vsc_ugent.config
@ -7,9 +7,9 @@ workDir = "$scratch_dir/work"
 // Perform work directory cleanup when the run has succesfully completed
 // cleanup = true

-// Reduce the job submit rate to about 10 per second, this way the server won't be bombarded with jobs
+// Reduce the job submit rate to about 5 per second, this way the server won't be bombarded with jobs
 executor {
-    submitRateLimit = '10 sec'
+    submitRateLimit = '5 sec'
 }

 // Specify that singularity should be used and where the cache dir will be for the images
@ -19,6 +19,10 @@ singularity {
    cacheDir = "$scratch_dir/singularity"
 }

+env {
+    SINGULARITY_CACHEDIR="$scratch_dir/.singularity"
+}
+
 // Define profiles for each cluster
 profiles {
    skitty {
@ -35,7 +39,6 @@ profiles {
            executor = 'slurm'
            queue = 'skitty'
            maxRetries = 2
-            beforeScript = "export SINGULARITY_CACHEDIR=$scratch_dir/.singularity"
            scratch = "$scratch_dir"
        }
    }
@ -54,7 +57,6 @@ profiles {
            executor = 'slurm'
            queue = 'swalot'
            maxRetries = 2
-            beforeScript = "export SINGULARITY_CACHEDIR=$scratch_dir/.singularity"
            scratch = "$scratch_dir"
        }
    }
@ -73,7 +75,6 @@ profiles {
            executor = 'slurm'
            queue = 'victini'
            maxRetries = 2
-            beforeScript = "export SINGULARITY_CACHEDIR=$scratch_dir/.singularity"
            scratch = "$scratch_dir"
        }
    }
@ -92,7 +93,6 @@ profiles {
            executor = 'slurm'
            queue = 'kirlia'
            maxRetries = 2
-            beforeScript = "export SINGULARITY_CACHEDIR=$scratch_dir/.singularity"
            scratch = "$scratch_dir"
        }
    }
@ -111,7 +111,6 @@ profiles {
            executor = 'slurm'
            queue = 'doduo'
            maxRetries = 2
-            beforeScript = "export SINGULARITY_CACHEDIR=$scratch_dir/.singularity"
            scratch = "$scratch_dir"
        }
    }
--- a/docs/sage.md
+++ b/docs/sage.md
@ -0,0 +1,27 @@
+# nf-core/configs: Sage Bionetworks Global Configuration
+
+To use this custom configuration, run the pipeline with `-profile sage`. This will download and load the [`sage.config`](../conf/sage.config), which contains a number of optimizations relevant to Sage employees running workflows on AWS (_e.g._ using Nextflow Tower). This profile will also load any applicable pipeline-specific configuration.
+
+This global configuration includes the following tweaks:
+
+- Update the default value for `igenomes_base` to `s3://sage-igenomes`
+- Increase the default time limits because we run pipelines on AWS
+- Enable retries by default when exit codes relate to insufficient memory
+- Allow pending jobs to finish if the number of retries are exhausted
+- Slow the increase in the number of allocated CPU cores on retries
+- Define the `check_max()` function, which is missing in Sarek v2
+
+## Additional information about iGenomes
+
+The following iGenomes prefixes have been copied from `s3://ngi-igenomes/` (`eu-west-1`) to `s3://sage-igenomes` (`us-east-1`). See [this script](https://github.com/Sage-Bionetworks-Workflows/nextflow-infra/blob/main/bin/mirror-igenomes.sh) for more information. The `sage-igenomes` S3 bucket has been configured to openly available, but files cannot be downloaded out of `us-east-1` to avoid egress charges. You can check the `conf/igenomes.config` file in each nf-core pipeline to figure out the mapping between genome IDs (_i.e._ for `--genome`) and iGenomes prefixes ([example](https://github.com/nf-core/rnaseq/blob/89bf536ce4faa98b4d50a8ec0a0343780bc62e0a/conf/igenomes.config#L14-L26)).
+
+- **Human Genome Builds**
+  - `Homo_sapiens/Ensembl/GRCh37`
+  - `Homo_sapiens/GATK/GRCh37`
+  - `Homo_sapiens/UCSC/hg19`
+  - `Homo_sapiens/GATK/GRCh38`
+  - `Homo_sapiens/NCBI/GRCh38`
+  - `Homo_sapiens/UCSC/hg38`
+- **Mouse Genome Builds**
+  - `Mus_musculus/Ensembl/GRCm38`
+  - `Mus_musculus/UCSC/mm10`
--- a/nfcore_custom.config
+++ b/nfcore_custom.config
@ -59,6 +59,7 @@ profiles {
  phoenix      { includeConfig "${params.custom_config_base}/conf/phoenix.config" }
  prince       { includeConfig "${params.custom_config_base}/conf/prince.config" }
  rosalind     { includeConfig "${params.custom_config_base}/conf/rosalind.config" }
+  sage         { includeConfig "${params.custom_config_base}/conf/sage.config" }
  sahmri       { includeConfig "${params.custom_config_base}/conf/sahmri.config" }
  sanger       { includeConfig "${params.custom_config_base}/conf/sanger.config"}
  seg_globe    { includeConfig "${params.custom_config_base}/conf/seg_globe.config"}