mirror of
https://github.com/MillironX/nf-configs.git
synced 2024-11-22 08:29:54 +00:00
Improve AWS-related config for Sage profile
This commit is contained in:
parent
6f0d9e6c43
commit
317e5a16cb
2 changed files with 74 additions and 45 deletions
112
conf/sage.config
112
conf/sage.config
|
@ -1,62 +1,88 @@
|
||||||
|
// Config profile metadata
|
||||||
params {
|
params {
|
||||||
config_profile_description = 'The Sage Bionetworks profile'
|
config_profile_description = 'The Sage Bionetworks profile'
|
||||||
config_profile_contact = 'Bruno Grande (@BrunoGrandePhD)'
|
config_profile_contact = 'Bruno Grande (@BrunoGrandePhD)'
|
||||||
config_profile_url = 'https://github.com/Sage-Bionetworks-Workflows'
|
config_profile_url = 'https://github.com/Sage-Bionetworks-Workflows'
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Leverage us-east-1 mirror of select human and mouse genomes
|
||||||
|
params {
|
||||||
|
igenomes_base = 's3://sage-igenomes/igenomes'
|
||||||
|
}
|
||||||
|
|
||||||
|
// Enable retries globally for certain exit codes
|
||||||
process {
|
process {
|
||||||
|
|
||||||
cpus = { check_max( 1 * slow(task.attempt), 'cpus' ) }
|
|
||||||
memory = { check_max( 6.GB * task.attempt, 'memory' ) }
|
|
||||||
time = { check_max( 24.h * task.attempt, 'time' ) }
|
|
||||||
|
|
||||||
errorStrategy = { task.exitStatus in [143,137,104,134,139,247] ? 'retry' : 'finish' }
|
errorStrategy = { task.exitStatus in [143,137,104,134,139,247] ? 'retry' : 'finish' }
|
||||||
maxRetries = 5
|
maxRetries = 5
|
||||||
maxErrors = '-1'
|
maxErrors = '-1'
|
||||||
|
|
||||||
// Process-specific resource requirements
|
|
||||||
withLabel:process_low {
|
|
||||||
cpus = { check_max( 4 * slow(task.attempt), 'cpus' ) }
|
|
||||||
memory = { check_max( 12.GB * task.attempt, 'memory' ) }
|
|
||||||
time = { check_max( 24.h * task.attempt, 'time' ) }
|
|
||||||
}
|
|
||||||
withLabel:process_medium {
|
|
||||||
cpus = { check_max( 12 * slow(task.attempt), 'cpus' ) }
|
|
||||||
memory = { check_max( 36.GB * task.attempt, 'memory' ) }
|
|
||||||
time = { check_max( 48.h * task.attempt, 'time' ) }
|
|
||||||
}
|
|
||||||
withLabel:process_high {
|
|
||||||
cpus = { check_max( 24 * slow(task.attempt), 'cpus' ) }
|
|
||||||
memory = { check_max( 72.GB * task.attempt, 'memory' ) }
|
|
||||||
time = { check_max( 96.h * task.attempt, 'time' ) }
|
|
||||||
}
|
|
||||||
withLabel:process_long {
|
|
||||||
time = { check_max( 192.h * task.attempt, 'time' ) }
|
|
||||||
}
|
|
||||||
withLabel:process_high_memory {
|
|
||||||
memory = { check_max( 128.GB * task.attempt, 'memory' ) }
|
|
||||||
}
|
|
||||||
|
|
||||||
// Preventing Sarek labels from using the actual maximums
|
|
||||||
withLabel:memory_max {
|
|
||||||
memory = { check_max( 128.GB * task.attempt, 'memory' ) }
|
|
||||||
}
|
|
||||||
withLabel:cpus_max {
|
|
||||||
cpus = { check_max( 24 * slow(task.attempt), 'cpus' ) }
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Increase time limit to allow file transfers to finish
|
||||||
|
// The default is 12 hours, which results in timeouts
|
||||||
|
threadPool.FileTransfer.maxAwait = '24 hour'
|
||||||
|
|
||||||
|
// Configure Nextflow to be more reliable on AWS
|
||||||
aws {
|
aws {
|
||||||
region = "us-east-1"
|
region = "us-east-1"
|
||||||
|
client {
|
||||||
|
uploadChunkSize = 209715200
|
||||||
|
}
|
||||||
|
}
|
||||||
|
executor {
|
||||||
|
name = 'awsbatch'
|
||||||
|
// Ensure unlimited queue size on AWS Batch
|
||||||
|
queueSize = 100000
|
||||||
|
// Slow down the rate at which AWS Batch jobs accumulate in
|
||||||
|
// the queue (an attempt to prevent orphaned EBS volumes)
|
||||||
|
submitRateLimit = '5 / 1 sec'
|
||||||
}
|
}
|
||||||
|
|
||||||
params {
|
// Disabling resource allocation tweaks for now
|
||||||
igenomes_base = 's3://sage-igenomes/igenomes'
|
//
|
||||||
max_memory = 500.GB
|
// params {
|
||||||
max_cpus = 64
|
// max_memory = 500.GB
|
||||||
max_time = 168.h // One week
|
// max_cpus = 64
|
||||||
}
|
// max_time = 168.h // One week
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// process {
|
||||||
|
//
|
||||||
|
// cpus = { check_max( 1 * slow(task.attempt), 'cpus' ) }
|
||||||
|
// memory = { check_max( 6.GB * task.attempt, 'memory' ) }
|
||||||
|
// time = { check_max( 24.h * task.attempt, 'time' ) }
|
||||||
|
//
|
||||||
|
// // Process-specific resource requirements
|
||||||
|
// withLabel:process_low {
|
||||||
|
// cpus = { check_max( 4 * slow(task.attempt), 'cpus' ) }
|
||||||
|
// memory = { check_max( 12.GB * task.attempt, 'memory' ) }
|
||||||
|
// time = { check_max( 24.h * task.attempt, 'time' ) }
|
||||||
|
// }
|
||||||
|
// withLabel:process_medium {
|
||||||
|
// cpus = { check_max( 12 * slow(task.attempt), 'cpus' ) }
|
||||||
|
// memory = { check_max( 36.GB * task.attempt, 'memory' ) }
|
||||||
|
// time = { check_max( 48.h * task.attempt, 'time' ) }
|
||||||
|
// }
|
||||||
|
// withLabel:process_high {
|
||||||
|
// cpus = { check_max( 24 * slow(task.attempt), 'cpus' ) }
|
||||||
|
// memory = { check_max( 72.GB * task.attempt, 'memory' ) }
|
||||||
|
// time = { check_max( 96.h * task.attempt, 'time' ) }
|
||||||
|
// }
|
||||||
|
// withLabel:process_long {
|
||||||
|
// time = { check_max( 192.h * task.attempt, 'time' ) }
|
||||||
|
// }
|
||||||
|
// withLabel:process_high_memory {
|
||||||
|
// memory = { check_max( 128.GB * task.attempt, 'memory' ) }
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// // Preventing Sarek labels from using the actual maximums
|
||||||
|
// withLabel:memory_max {
|
||||||
|
// memory = { check_max( 128.GB * task.attempt, 'memory' ) }
|
||||||
|
// }
|
||||||
|
// withLabel:cpus_max {
|
||||||
|
// cpus = { check_max( 24 * slow(task.attempt), 'cpus' ) }
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// }
|
||||||
|
|
||||||
// Function to slow the increase of the resource multipler
|
// Function to slow the increase of the resource multipler
|
||||||
// as attempts are made. The rationale is that some CPUs
|
// as attempts are made. The rationale is that some CPUs
|
||||||
|
|
|
@ -5,11 +5,14 @@ To use this custom configuration, run the pipeline with `-profile sage`. This wi
|
||||||
This global configuration includes the following tweaks:
|
This global configuration includes the following tweaks:
|
||||||
|
|
||||||
- Update the default value for `igenomes_base` to `s3://sage-igenomes`
|
- Update the default value for `igenomes_base` to `s3://sage-igenomes`
|
||||||
- Increase the default time limits because we run pipelines on AWS
|
|
||||||
- Enable retries by default when exit codes relate to insufficient memory
|
- Enable retries by default when exit codes relate to insufficient memory
|
||||||
- Allow pending jobs to finish if the number of retries are exhausted
|
- Allow pending jobs to finish if the number of retries are exhausted
|
||||||
- Slow the increase in the number of allocated CPU cores on retries
|
- Increase the amount of time allowed for file transfers
|
||||||
|
- Increase the default chunk size for multipart uploads to S3
|
||||||
|
- Slow down job submission rate to avoid overwhelming any APIs
|
||||||
- Define the `check_max()` function, which is missing in Sarek v2
|
- Define the `check_max()` function, which is missing in Sarek v2
|
||||||
|
- (Disabled temporarily) Slow the increase in the number of allocated CPU cores on retries
|
||||||
|
- (Disabled temporarily) Increase the default time limits because we run pipelines on AWS
|
||||||
|
|
||||||
## Additional information about iGenomes
|
## Additional information about iGenomes
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue