diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index a5c1ca4..382a0f7 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -49,6 +49,7 @@ jobs: - 'imperial_mb' - 'jax' - 'lugh' + - 'maestro' - 'mpcdf' - 'munin' - 'nu_genomics' diff --git a/README.md b/README.md index 3fbadc1..6c0d197 100644 --- a/README.md +++ b/README.md @@ -118,6 +118,7 @@ Currently documentation is available for the following systems: * [ICR_DAVROS](docs/icr_davros.md) * [JAX](docs/jax.md) * [LUGH](docs/lugh.md) +* [MAESTRO](docs/maestro.md) * [MPCDF](docs/mpcdf.md) * [MUNIN](docs/munin.md) * [NU_GENOMICS](docs/nu_genomics.md) diff --git a/conf/maestro.config b/conf/maestro.config new file mode 100644 index 0000000..8815932 --- /dev/null +++ b/conf/maestro.config @@ -0,0 +1,49 @@ +params { + config_profile_description = 'Institut Pasteur Maestro cluster profile' + config_profile_url = 'https://research.pasteur.fr/en/equipment/maestro-compute-cluster/' + config_profile_contact = 'Pierre Luisi (@pierrespc)' +} + +singularity { + enabled = true + autoMounts = true + runOptions = '--home $HOME:/home/$USER --bind /pasteur' +} + +profiles { + + normal { + process { + executor = 'slurm' + scratch = false + queue = 'common' + clusterOptions = '--qos=normal' + } + + params { + igenomes_ignore = true + igenomesIgnore = true + max_memory = 400.GB + max_cpus = 96 + max_time = 24.h + } + } + + long { + process { + executor = 'slurm' + scratch = false + queue = 'common' + clusterOptions = '--qos=long' + } + + params { + igenomes_ignore = true + igenomesIgnore = true + max_memory = 400.GB + max_cpus = 5 + max_time = 8760.h + } + } + +} diff --git a/conf/pipeline/eager/maestro.config b/conf/pipeline/eager/maestro.config new file mode 100644 index 0000000..4a6a185 --- /dev/null +++ b/conf/pipeline/eager/maestro.config @@ -0,0 +1,116 @@ +/* + * ------------------------------------------------- + * Nextflow config file for running nf-core eager on whole genome data or mitogenomes + * ------------------------------------------------- + * nextflow run nf-core/eager -profile maestro,,maestro, (where is long or normal and is nuclear, mitocondrial or unlimitedtime) + */ + +params { + + config_profile_name = 'nf-core/eager nuclear/mitocondrial - human profiles' + + config_profile_description = "Simple profiles for assessing computational ressources that fit human nuclear dna, human mitogenomes processing. unlimitedtime is also available " + +} + + +profiles { + + nuclear { + process { + errorStrategy = 'retry' + maxRetries = 2 + + withName:'makeBWAIndex'{ + cpus = { check_max( 8 * task.attempt, 'cpus' ) } + memory = { check_max( 8.GB * task.attempt, 'memory' ) } + time = { check_max( 12.h * task.attempt, 'time' ) } + } + withName:'adapter_removal'{ + cpus = { check_max( 8 * task.attempt, 'cpus' ) } + memory = { check_max( 16.GB * task.attempt, 'memory' ) } + time = { check_max( 12.h * task.attempt, 'time' ) } + } + withName:'bwa'{ + cpus = { check_max( 40 * task.attempt, 'cpus' ) } + memory = { check_max( 40.GB * task.attempt, 'memory' ) } + time = 24.h + cache = 'deep' + } + withName:'markduplicates'{ + errorStrategy = { task.exitStatus in [143,137,104,134,139] ? 'retry' : 'finish' } + cpus = { check_max( 16 * task.attempt, 'cpus' ) } + memory = { check_max( 16.GB * task.attempt, 'memory' ) } + time = { check_max( 12.h * task.attempt, 'time' ) } + } + withName:'damageprofiler'{ + cpus = 1 + memory = { check_max( 8.GB * task.attempt, 'memory' ) } + time = { check_max( 6.h * task.attempt, 'time' ) } + } + withName:'fastp'{ + cpus = 8 + memory = { check_max( 8.GB * task.attempt, 'memory' ) } + time = { check_max( 6.h * task.attempt, 'time' ) } + } + withName:'fastqc'{ + cpus = 2 + memory = { check_max( 8.GB * task.attempt, 'memory' ) } + time = { check_max( 6.h * task.attempt, 'time' ) } + } + } + } + + mitocondrial { + process { + errorStrategy = 'retry' + maxRetries = 2 + + withName:'makeBWAIndex'{ + cpus = { check_max( 8 * task.attempt, 'cpus' ) } + memory = { check_max( 8.GB * task.attempt, 'memory' ) } + time = { check_max( 12.h * task.attempt, 'time' ) } + } + withName:'adapter_removal'{ + cpus = { check_max( 8 * task.attempt, 'cpus' ) } + memory = { check_max( 16.GB * task.attempt, 'memory' ) } + time = { check_max( 12.h * task.attempt, 'time' ) } + } + withName:'bwa'{ + cpus = { check_max( 5 * task.attempt, 'cpus' ) } + memory = { check_max( 5.GB * task.attempt, 'memory' ) } + time = 24.h + } + withName:'markduplicates'{ + errorStrategy = { task.exitStatus in [143,137,104,134,139] ? 'retry' : 'finish' } + cpus = { check_max( 5 * task.attempt, 'cpus' ) } + memory = { check_max( 5.GB * task.attempt, 'memory' ) } + time = { check_max( 6.h * task.attempt, 'time' ) } + } + withName:'damageprofiler'{ + cpus = 1 + memory = { check_max( 5.GB * task.attempt, 'memory' ) } + time = { check_max( 3.h * task.attempt, 'time' ) } + } + withName:'fastp'{ + cpus = 8 + memory = { check_max( 5.GB * task.attempt, 'memory' ) } + time = { check_max( 3.h * task.attempt, 'time' ) } + } + withName:'fastqc'{ + cpus = 2 + memory = { check_max( 8.GB * task.attempt, 'memory' ) } + time = { check_max( 6.h * task.attempt, 'time' ) } + } + } + } + unlimitedtime { + process { + errorStrategy = 'finish' + + cpus = 5 + memory = 200.GB + time = 8760.h + + } +} diff --git a/conf/uppmax.config b/conf/uppmax.config index df35835..d8b83ad 100644 --- a/conf/uppmax.config +++ b/conf/uppmax.config @@ -13,9 +13,43 @@ singularity { envWhitelist = 'SNIC_TMP' } +def hostname = "r1" +try { + hostname = "sinfo --local -N -h | grep -F -v CLUSTER: | head -1 | cut -f1 -d' ' ".execute().text.trim() +} catch (java.io.IOException e) { + System.err.println("WARNING: Could not run sinfo to determine current cluster, defaulting to rackham") +} + +// closure to create a suitable clusterOptions +def clusterOptionsCreator = { m -> + String base = "-A $params.project ${params.clusterOptions ?: ''}" + // Do not use -p node on irma or if a thin node/core is enough + if (m < 125.GB || hostname ==~ "i.*") { + return base + } + + if (m < 250.GB) { + return base + " -p node -C mem256GB " + } + + + // Use mem1TB for remaining cases on rackham (no 512 Gbyte nodes) + if (hostname ==~ "r.*") { + return base + " -p node -C mem1TB " + } + + if (m > 500.GB) { + // Special case for snowy very fat node (only remaining case that's above 500 GB) + return base + " -p veryfat " + } + + // Should only be cases for mem512GB left (snowy and bianca) + return base + " -p node -C mem512GB " +} + process { executor = 'slurm' - clusterOptions = { "-A $params.project ${params.clusterOptions ?: ''}" } + clusterOptions = { clusterOptionsCreator(task.memory) } // Use node local storage for execution. scratch = '$SNIC_TMP' } @@ -23,17 +57,16 @@ process { params { save_reference = true - max_memory = 125.GB + max_memory = 500.GB max_cpus = 16 max_time = 240.h // illumina iGenomes reference file paths on UPPMAX - igenomes_base = '/sw/data/uppnex/igenomes/' + igenomes_base = '/sw/data/igenomes/' } -def hostname = "hostname".execute().text.trim() - -if (hostname ==~ "b.*" || hostname ==~ "s.*") { - params.max_memory = 109.GB +if (hostname ==~ "s[0-9][0-9]*") { + params.max_time = 700.h + params.max_memory = 3880.GB } if (hostname ==~ "i.*") { @@ -42,6 +75,7 @@ if (hostname ==~ "i.*") { if (hostname ==~ "r.*") { params.max_cpus = 20 + params.max_memory = 970.GB } profiles { diff --git a/docs/maestro.md b/docs/maestro.md new file mode 100644 index 0000000..718342b --- /dev/null +++ b/docs/maestro.md @@ -0,0 +1,19 @@ +# nf-core/configs Maestro (at Pateur Institute, Paris) Configuration + +To use, run the pipeline with `-profile maestro,` (with qos being long or normal). This will download and launch the maestro.config which has been pre-configured with a setup suitable for the Maestro cluster on either the long or normal qos. +Using one of these profiles, a docker image containing all of the required software will be downloaded, and converted to a Singularity image before execution of the pipeline + +## needed Modules + +Please first load java, nextflow and singularity modules +`module load java` +`module load nextflow` +`module load singularity` + +Also, do not forget to run nextflow using tmux or alike. + +## Other profiles at Pasteur + +If you are using TARS cluster, please refer to pasteur profile. + +Please refer to docs/pasteur.md for installing and running nf-core instructions. diff --git a/docs/pipeline/eager/maestro.md b/docs/pipeline/eager/maestro.md new file mode 100644 index 0000000..8853eef --- /dev/null +++ b/docs/pipeline/eager/maestro.md @@ -0,0 +1,30 @@ +# nf-core/configs maestro eager specific configuration + +Extra specific configuration for eager pipeline for human DNA data processing + +## Usage + +To use, run the pipeline with `-profile maestro,,`, where qos can be normal or long and type can be nuclear or mitochondrial + +This will download and launch the eager specific [`maestro.config`](../../../conf/pipeline/eager/maestro.config) which has been pre-configured with a setup suitable for the Maestro cluster. + +Example: `nextflow run nf-core/eager -profile maestro,normal,nuclear` + +## eager specific configurations for maestro + +Specific configurations for maestro has been made for eager. + +We decided not to provide any tool parameters here, and focus the profile only for resource management: Maestro profiles runs with default nf-core/eager parameters, but with modifications concerning time (limit to 24h in normal qos, so increasing the memory and CPUs, specially for alignments). + +## nuclear + +Increases the number of CPUs and the amount of memory for key processes + +## mitochondrial + +More limited computational resources + +## unlimitedtime + +Every process has one year time limit. To be used only when some processes can not be completed for time reasons when using mitochondrial or nuclear profiles. +Expect slow processes when using this profile because only 5 CPUs are available at a time. diff --git a/nfcore_custom.config b/nfcore_custom.config index 1c6995c..eb5b509 100644 --- a/nfcore_custom.config +++ b/nfcore_custom.config @@ -43,6 +43,7 @@ profiles { imperial_mb { includeConfig "${params.custom_config_base}/conf/imperial_mb.config" } jax { includeConfig "${params.custom_config_base}/conf/jax.config" } lugh { includeConfig "${params.custom_config_base}/conf/lugh.config" } + maestro { includeConfig "${params.custom_config_base}/conf/maestro.config" } mpcdf { includeConfig "${params.custom_config_base}/conf/mpcdf.config" } munin { includeConfig "${params.custom_config_base}/conf/munin.config" } nu_genomics { includeConfig "${params.custom_config_base}/conf/nu_genomics.config" } diff --git a/pipeline/eager.config b/pipeline/eager.config index 3e47276..10dd990 100644 --- a/pipeline/eager.config +++ b/pipeline/eager.config @@ -11,4 +11,5 @@ profiles { mpcdf { includeConfig "${params.custom_config_base}/conf/pipeline/eager/mpcdf.config" } eva { includeConfig "${params.custom_config_base}/conf/pipeline/eager/eva.config" } + maestro { includeConfig "${params.custom_config_base}/conf/pipeline/eager/maestro.config" } }