nf-configs/conf/sage.config

// Config profile metadata
params {
  config_profile_description = 'The Sage Bionetworks profile'
  config_profile_contact = 'Bruno Grande (@BrunoGrandePhD)'
  config_profile_url = 'https://github.com/Sage-Bionetworks-Workflows'
}

// Leverage us-east-1 mirror of select human and mouse genomes
params {
  igenomes_base = 's3://sage-igenomes/igenomes'
  max_memory    = '128.GB'
  max_cpus      = 16
  max_time      = '240.h'
}

// Enable retries globally for certain exit codes
process {
  errorStrategy = { task.exitStatus in [143,137,104,134,139,247] ? 'retry' : 'finish' }
  maxRetries    = 5
  maxErrors     = '-1'
}

// Increase time limit to allow file transfers to finish
// The default is 12 hours, which results in timeouts
threadPool.FileTransfer.maxAwait = '24 hour'

// Configure Nextflow to be more reliable on AWS
aws {
  region = "us-east-1"
  client {
    uploadChunkSize = 209715200
    uploadMaxThreads = 4
  }
  batch {
    maxParallelTransfers = 1
    maxTransferAttempts = 5
    delayBetweenAttempts = '120 sec'
  }
}
executor {
    name = 'awsbatch'
    // Ensure unlimited queue size on AWS Batch
    queueSize = 500
    // Slow down the rate at which AWS Batch jobs accumulate in
    // the queue (an attempt to prevent orphaned EBS volumes)
    submitRateLimit = '5 / 1 sec'
}

// Adjust default resource allocations (see `../docs/sage.md`)
process {

  cpus   = { check_max( 1    * slow(task.attempt), 'cpus'   ) }
  memory = { check_max( 6.GB * task.attempt,       'memory' ) }
  time   = { check_max( 24.h * task.attempt,       'time'   ) }

  // Process-specific resource requirements
  withLabel:process_low {
    cpus   = { check_max( 4     * slow(task.attempt),  'cpus'   ) }
    memory = { check_max( 12.GB * task.attempt,        'memory' ) }
    time   = { check_max( 24.h  * task.attempt,        'time'   ) }
  }
  withLabel:process_medium {
    cpus   = { check_max( 12    * slow(task.attempt), 'cpus'   ) }
    memory = { check_max( 36.GB * task.attempt,       'memory' ) }
    time   = { check_max( 48.h  * task.attempt,       'time'   ) }
  }
  withLabel:process_high {
    cpus   = { check_max( 24    * slow(task.attempt), 'cpus'   ) }
    memory = { check_max( 72.GB * task.attempt,       'memory' ) }
    time   = { check_max( 96.h  * task.attempt,       'time'   ) }
  }
  withLabel:process_long {
    time   = { check_max( 192.h  * task.attempt,   'time'   ) }
  }
  withLabel:process_high_memory {
    memory = { check_max( 128.GB * task.attempt,   'memory' ) }
  }

}

// Function to slow the increase of the resource multipler
// as attempts are made. The rationale is that the number
// of CPU cores isn't a limiting factor as often as memory.
def slow(attempt, factor = 2) {
 return Math.ceil( attempt / factor) as int
}


// Function to ensure that resource requirements don't go
// beyond a maximum limit (copied here for Sarek v2)
def check_max(obj, type) {
    if (type == 'memory') {
        try {
            if (obj.compareTo(params.max_memory as nextflow.util.MemoryUnit) == 1)
                return params.max_memory as nextflow.util.MemoryUnit
            else
                return obj
        } catch (all) {
            println "   ### ERROR ###   Max memory '${params.max_memory}' is not valid! Using default value: $obj"
            return obj
        }
    } else if (type == 'time') {
        try {
            if (obj.compareTo(params.max_time as nextflow.util.Duration) == 1)
                return params.max_time as nextflow.util.Duration
            else
                return obj
        } catch (all) {
            println "   ### ERROR ###   Max time '${params.max_time}' is not valid! Using default value: $obj"
            return obj
        }
    } else if (type == 'cpus') {
        try {
            return Math.min( obj, params.max_cpus as int )
        } catch (all) {
            println "   ### ERROR ###   Max cpus '${params.max_cpus}' is not valid! Using default value: $obj"
            return obj
        }
    }
}
Improve AWS-related config for Sage profile 2022-08-25 23:24:52 +00:00			`// Config profile metadata`
Add Sage Bionetworks nf-core configuration 2022-06-08 21:16:52 +00:00			`params {`
			`config_profile_description = 'The Sage Bionetworks profile'`
			`config_profile_contact = 'Bruno Grande (@BrunoGrandePhD)'`
			`config_profile_url = 'https://github.com/Sage-Bionetworks-Workflows'`
			`}`

Improve AWS-related config for Sage profile 2022-08-25 23:24:52 +00:00			`// Leverage us-east-1 mirror of select human and mouse genomes`
			`params {`
			`igenomes_base = 's3://sage-igenomes/igenomes'`
Incorporate resource limits 2022-08-31 16:18:27 +00:00			`max_memory = '128.GB'`
			`max_cpus = 16`
			`max_time = '240.h'`
Improve AWS-related config for Sage profile 2022-08-25 23:24:52 +00:00			`}`
Add Sage Bionetworks nf-core configuration 2022-06-08 21:16:52 +00:00
Improve AWS-related config for Sage profile 2022-08-25 23:24:52 +00:00			`// Enable retries globally for certain exit codes`
			`process {`
Add Sage Bionetworks nf-core configuration 2022-06-08 21:16:52 +00:00			`errorStrategy = { task.exitStatus in [143,137,104,134,139,247] ? 'retry' : 'finish' }`
			`maxRetries = 5`
			`maxErrors = '-1'`
			`}`

Improve AWS-related config for Sage profile 2022-08-25 23:24:52 +00:00			`// Increase time limit to allow file transfers to finish`
			`// The default is 12 hours, which results in timeouts`
			`threadPool.FileTransfer.maxAwait = '24 hour'`

			`// Configure Nextflow to be more reliable on AWS`
Add Sage Bionetworks nf-core configuration 2022-06-08 21:16:52 +00:00			`aws {`
			`region = "us-east-1"`
Improve AWS-related config for Sage profile 2022-08-25 23:24:52 +00:00			`client {`
			`uploadChunkSize = 209715200`
Improve reliability of file transfers 2022-09-17 17:14:04 +00:00			`uploadMaxThreads = 4`
Improve AWS-related config for Sage profile 2022-08-25 23:24:52 +00:00			`}`
Limit the number of parallel transfers 2022-09-13 20:15:30 +00:00			`batch {`
			`maxParallelTransfers = 1`
Improve reliability of file transfers 2022-09-17 17:14:04 +00:00			`maxTransferAttempts = 5`
			`delayBetweenAttempts = '120 sec'`
Limit the number of parallel transfers 2022-09-13 20:15:30 +00:00			`}`
Add Sage Bionetworks nf-core configuration 2022-06-08 21:16:52 +00:00			`}`
Improve AWS-related config for Sage profile 2022-08-25 23:24:52 +00:00			`executor {`
			`name = 'awsbatch'`
			`// Ensure unlimited queue size on AWS Batch`
Decrease AWS Batch queue size 2022-09-12 18:40:43 +00:00			`queueSize = 500`
Improve AWS-related config for Sage profile 2022-08-25 23:24:52 +00:00			`// Slow down the rate at which AWS Batch jobs accumulate in`
			`// the queue (an attempt to prevent orphaned EBS volumes)`
			`submitRateLimit = '5 / 1 sec'`
Add Sage Bionetworks nf-core configuration 2022-06-08 21:16:52 +00:00			`}`

Simplify resource adjustments 2022-08-31 16:10:30 +00:00			// Adjust default resource allocations (see `../docs/sage.md`)
			`process {`

			`cpus = { check_max( 1 * slow(task.attempt), 'cpus' ) }`
			`memory = { check_max( 6.GB * task.attempt, 'memory' ) }`
			`time = { check_max( 24.h * task.attempt, 'time' ) }`

			`// Process-specific resource requirements`
			`withLabel:process_low {`
			`cpus = { check_max( 4 * slow(task.attempt), 'cpus' ) }`
			`memory = { check_max( 12.GB * task.attempt, 'memory' ) }`
			`time = { check_max( 24.h * task.attempt, 'time' ) }`
			`}`
			`withLabel:process_medium {`
			`cpus = { check_max( 12 * slow(task.attempt), 'cpus' ) }`
			`memory = { check_max( 36.GB * task.attempt, 'memory' ) }`
			`time = { check_max( 48.h * task.attempt, 'time' ) }`
			`}`
			`withLabel:process_high {`
			`cpus = { check_max( 24 * slow(task.attempt), 'cpus' ) }`
			`memory = { check_max( 72.GB * task.attempt, 'memory' ) }`
			`time = { check_max( 96.h * task.attempt, 'time' ) }`
			`}`
			`withLabel:process_long {`
			`time = { check_max( 192.h * task.attempt, 'time' ) }`
			`}`
			`withLabel:process_high_memory {`
			`memory = { check_max( 128.GB * task.attempt, 'memory' ) }`
			`}`

			`}`
Improve AWS-related config for Sage profile 2022-08-25 23:24:52 +00:00
Add Sage Bionetworks nf-core configuration 2022-06-08 21:16:52 +00:00			`// Function to slow the increase of the resource multipler`
Simplify resource adjustments 2022-08-31 16:10:30 +00:00			`// as attempts are made. The rationale is that the number`
			`// of CPU cores isn't a limiting factor as often as memory.`
Add Sage Bionetworks nf-core configuration 2022-06-08 21:16:52 +00:00			`def slow(attempt, factor = 2) {`
Simplify resource adjustments 2022-08-31 16:10:30 +00:00			`return Math.ceil( attempt / factor) as int`
Add Sage Bionetworks nf-core configuration 2022-06-08 21:16:52 +00:00			`}`
Move `check_max()` to global config 2022-06-23 16:44:16 +00:00

			`// Function to ensure that resource requirements don't go`
Complete `check_max()` move to global config 2022-06-23 17:10:28 +00:00			`// beyond a maximum limit (copied here for Sarek v2)`
Move `check_max()` to global config 2022-06-23 16:44:16 +00:00			`def check_max(obj, type) {`
			`if (type == 'memory') {`
			`try {`
			`if (obj.compareTo(params.max_memory as nextflow.util.MemoryUnit) == 1)`
			`return params.max_memory as nextflow.util.MemoryUnit`
			`else`
			`return obj`
			`} catch (all) {`
			`println " ### ERROR ### Max memory '${params.max_memory}' is not valid! Using default value: $obj"`
			`return obj`
			`}`
			`} else if (type == 'time') {`
			`try {`
			`if (obj.compareTo(params.max_time as nextflow.util.Duration) == 1)`
			`return params.max_time as nextflow.util.Duration`
			`else`
			`return obj`
			`} catch (all) {`
			`println " ### ERROR ### Max time '${params.max_time}' is not valid! Using default value: $obj"`
			`return obj`
			`}`
			`} else if (type == 'cpus') {`
			`try {`
			`return Math.min( obj, params.max_cpus as int )`
			`} catch (all) {`
			`println " ### ERROR ### Max cpus '${params.max_cpus}' is not valid! Using default value: $obj"`
			`return obj`
			`}`
			`}`
			`}`