1
0
Fork 0
mirror of https://github.com/MillironX/nf-configs.git synced 2024-11-24 09:09:56 +00:00

Merge pull request #389 from Sage-Bionetworks-Workflows/bgrande/sage-config

[WIP] Add custom configuration for Sage Bionetworks
This commit is contained in:
Phil Ewels 2022-06-23 11:34:40 +02:00 committed by GitHub
commit 1730751d97
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 93 additions and 0 deletions

View file

@ -77,6 +77,7 @@ jobs:
- "phoenix"
- "prince"
- "rosalind"
- "sage"
- "sahmri"
- "sanger"
- "seg_globe"

View file

@ -131,6 +131,7 @@ Currently documentation is available for the following systems:
- [PHOENIX](docs/phoenix.md)
- [PRINCE](docs/prince.md)
- [ROSALIND](docs/rosalind.md)
- [SAGE BIONETWORKS](docs/sage.md)
- [SANGER](docs/sanger.md)
- [SEG_GLOBE](docs/seg_globe.md)
- [UCT_HPC](docs/uct_hpc.md)

66
conf/sage.config Normal file
View file

@ -0,0 +1,66 @@
params {
config_profile_description = 'The Sage Bionetworks profile'
config_profile_contact = 'Bruno Grande (@BrunoGrandePhD)'
config_profile_url = 'https://github.com/Sage-Bionetworks-Workflows'
}
process {
cpus = { check_max( 1 * slow(task.attempt), 'cpus' ) }
memory = { check_max( 6.GB * task.attempt, 'memory' ) }
time = { check_max( 24.h * task.attempt, 'time' ) }
errorStrategy = { task.exitStatus in [143,137,104,134,139,247] ? 'retry' : 'finish' }
maxRetries = 5
maxErrors = '-1'
// Process-specific resource requirements
withLabel:process_low {
cpus = { check_max( 4 * slow(task.attempt), 'cpus' ) }
memory = { check_max( 12.GB * task.attempt, 'memory' ) }
time = { check_max( 24.h * task.attempt, 'time' ) }
}
withLabel:process_medium {
cpus = { check_max( 12 * slow(task.attempt), 'cpus' ) }
memory = { check_max( 36.GB * task.attempt, 'memory' ) }
time = { check_max( 48.h * task.attempt, 'time' ) }
}
withLabel:process_high {
cpus = { check_max( 24 * slow(task.attempt), 'cpus' ) }
memory = { check_max( 72.GB * task.attempt, 'memory' ) }
time = { check_max( 96.h * task.attempt, 'time' ) }
}
withLabel:process_long {
time = { check_max( 192.h * task.attempt, 'time' ) }
}
withLabel:process_high_memory {
memory = { check_max( 128.GB * task.attempt, 'memory' ) }
}
// Preventing Sarek labels from using the actual maximums
withLabel:memory_max {
memory = { check_max( 128.GB * task.attempt, 'memory' ) }
}
withLabel:cpus_max {
cpus = { check_max( 24 * slow(task.attempt), 'cpus' ) }
}
}
aws {
region = "us-east-1"
}
params {
igenomes_base = 's3://sage-igenomes/igenomes'
max_memory = 500.GB
max_cpus = 64
max_time = 168.h // One week
}
// Function to slow the increase of the resource multipler
// as attempts are made. The rationale is that some CPUs
// don't need to be increased as fast as memory.
def slow(attempt, factor = 2) {
return Math.ceil( attempt / factor) as int
}

24
docs/sage.md Normal file
View file

@ -0,0 +1,24 @@
# nf-core/configs: Sage Bionetworks Configuration
To use this custom configuration, run the pipeline with `-profile sage`. This will download and launch the [`sage.config`](../conf/sage.config), which contains a number of optimizations relevant to Sage employees running workflows on AWS (_e.g._ using Nextflow Tower). These include:
- Updating the default value for `igenomes_base` to `s3://sage-igenomes`
- Increasing the default time limits because we run pipelines on AWS
- Enabling retries by default when exit codes relate to insufficient memory
- Allow pending jobs to finish if the number of retries are exhausted
- Slowing the increase in the number of allocated CPU cores on retries
## Additional information about iGenomes
The following iGenomes prefixes have been copied from `s3://ngi-igenomes/` (`eu-west-1`) to `s3://sage-igenomes` (`us-east-1`). See [this script](https://github.com/Sage-Bionetworks-Workflows/nextflow-infra/blob/main/bin/mirror-igenomes.sh) for more information. The `sage-igenomes` S3 bucket has been configured to openly available, but files cannot be downloaded out of `us-east-1` to avoid egress charges. You can check the `conf/igenomes.config` file in each nf-core pipeline to figure out the mapping between genome IDs (_i.e._ for `--genome`) and iGenomes prefixes ([example](https://github.com/nf-core/rnaseq/blob/89bf536ce4faa98b4d50a8ec0a0343780bc62e0a/conf/igenomes.config#L14-L26)).
- **Human Genome Builds**
- `Homo_sapiens/Ensembl/GRCh37`
- `Homo_sapiens/GATK/GRCh37`
- `Homo_sapiens/UCSC/hg19`
- `Homo_sapiens/GATK/GRCh38`
- `Homo_sapiens/NCBI/GRCh38`
- `Homo_sapiens/UCSC/hg38`
- **Mouse Genome Builds**
- `Mus_musculus/Ensembl/GRCm38`
- `Mus_musculus/UCSC/mm10`

View file

@ -59,6 +59,7 @@ profiles {
phoenix { includeConfig "${params.custom_config_base}/conf/phoenix.config" }
prince { includeConfig "${params.custom_config_base}/conf/prince.config" }
rosalind { includeConfig "${params.custom_config_base}/conf/rosalind.config" }
sage { includeConfig "${params.custom_config_base}/conf/sage.config" }
sahmri { includeConfig "${params.custom_config_base}/conf/sahmri.config" }
sanger { includeConfig "${params.custom_config_base}/conf/sanger.config"}
seg_globe { includeConfig "${params.custom_config_base}/conf/seg_globe.config"}