From 0bfedde296033992860b9b4c020d490e738434e6 Mon Sep 17 00:00:00 2001 From: SPearce Date: Fri, 12 Aug 2022 15:43:22 +0100 Subject: [PATCH 01/12] Initial attempt at a CRUKMI config file --- .github/workflows/main.yml | 1 + README.md | 1 + conf/crukmi.config | 48 ++++++++++++++++++++++++++++++++++++++ docs/crukmi.md | 9 +++++++ nfcore_custom.config | 1 + 5 files changed, 60 insertions(+) create mode 100644 conf/crukmi.config create mode 100644 docs/crukmi.md diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index f5ba0d1..59d038b 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -49,6 +49,7 @@ jobs: - "cheaha" - "computerome" - "crick" + - "crukmi" - "denbi_qbic" - "ebc" - "eddie" diff --git a/README.md b/README.md index 94ce55c..918ab0c 100644 --- a/README.md +++ b/README.md @@ -103,6 +103,7 @@ Currently documentation is available for the following systems: - [CHEAHA](docs/cheaha.md) - [Computerome](docs/computerome.md) - [CRICK](docs/crick.md) +- [Cancer Research UK Manchester Institute](docs/crukmi.md) - [CZBIOHUB_AWS](docs/czbiohub.md) - [DENBI_QBIC](docs/denbi_qbic.md) - [EBC](docs/ebc.md) diff --git a/conf/crukmi.config b/conf/crukmi.config new file mode 100644 index 0000000..3e7383b --- /dev/null +++ b/conf/crukmi.config @@ -0,0 +1,48 @@ +//Profile config names for nf-core/configs +params { + config_profile_description = 'Cancer Research UK Manchester Institute HPC cluster profile provided by nf-core/configs' + config_profile_contact = 'Stephen Kitcatt, Simon Pearce (@skitcattCRUKMI, @sppearce)' + config_profile_url = 'http://scicom.picr.man.ac.uk/projects/user-support/wiki' +} + +env { + SINGULARITY_CACHEDIR = '/lmod/nextflow_software' +} + +singularity { + enabled = true + autoMounts = true +} + +process { + beforeScript = 'module load apps/singularity/3.8.0' + executor = 'pbs' + + withLabel:process_low { + cpus = { check_max( 1 * task.attempt, 'cpus' ) } + memory = { check_max( 5.GB * task.attempt, 'memory' ) } + } + + withLabel:process_medium { + cpus = { check_max( 4 * task.attempt, 'cpus' ) } + memory = { check_max( 20.GB * task.attempt, 'memory' ) } + } + + withLabel:process_high { + cpus = { check_max( 16 * task.attempt, 'cpus' ) } + memory = { check_max( 80.GB * task.attempt, 'memory' ) } + } + + withName: 'SAMTOOLS_MPILEUP' { + cpus = 1 + memory = { 8.GB * task.attempt } + } + + +} + +params { + max_memory = 2000.GB + max_cpus = 32 + max_time = 72.h +} diff --git a/docs/crukmi.md b/docs/crukmi.md new file mode 100644 index 0000000..a55a9fa --- /dev/null +++ b/docs/crukmi.md @@ -0,0 +1,9 @@ +# nf-core/configs: BI Configuration + +All nf-core pipelines have been successfully configured for use at Boehringer Ingelheim. + +To use, run the pipeline with `-profile bi`. This will download and launch the [`bi.config`](../conf/bi.config) which has been pre-configured with a setup suitable for the BI systems. Using this profile, a docker image containing all of the required software will be downloaded, and converted to a Singularity image before execution of the pipeline. + +Before running the pipeline you will need to follow the internal documentation to run Nextflow on our systems. Similar to that, you need to set an environment variable `NXF_GLOBAL_CONFIG` to the path of the internal global config which is not publicly available here. + +> NB: Nextflow will need to submit the jobs via the job scheduler to the HPC cluster and as such the commands above will have to be executed on one of the login nodes. If in doubt contact IT. diff --git a/nfcore_custom.config b/nfcore_custom.config index 6f0ac6c..42b7f66 100644 --- a/nfcore_custom.config +++ b/nfcore_custom.config @@ -30,6 +30,7 @@ profiles { cheaha { includeConfig "${params.custom_config_base}/conf/cheaha.config" } computerome { includeConfig "${params.custom_config_base}/conf/computerome.config" } crick { includeConfig "${params.custom_config_base}/conf/crick.config" } + crukmi { includeConfig "${params.custom_config_base}/conf/crukmi.config" } czbiohub_aws { includeConfig "${params.custom_config_base}/conf/czbiohub_aws.config" } denbi_qbic { includeConfig "${params.custom_config_base}/conf/denbi_qbic.config" } ebc { includeConfig "${params.custom_config_base}/conf/ebc.config" } From a97b887d9362f6b63634ea8f3c3e4b5fb9c38eaf Mon Sep 17 00:00:00 2001 From: SPearce Date: Fri, 12 Aug 2022 16:57:04 +0100 Subject: [PATCH 02/12] Written a help file --- docs/crukmi.md | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/docs/crukmi.md b/docs/crukmi.md index a55a9fa..609a840 100644 --- a/docs/crukmi.md +++ b/docs/crukmi.md @@ -1,9 +1,15 @@ -# nf-core/configs: BI Configuration +# nf-core/configs: Cancer Research UK Manchester Institute Configuration -All nf-core pipelines have been successfully configured for use at Boehringer Ingelheim. +All nf-core pipelines have been successfully configured for the use on the HPC (phoenix) at Cancer Research UK Manchester Institute. -To use, run the pipeline with `-profile bi`. This will download and launch the [`bi.config`](../conf/bi.config) which has been pre-configured with a setup suitable for the BI systems. Using this profile, a docker image containing all of the required software will be downloaded, and converted to a Singularity image before execution of the pipeline. +To use, run the pipeline with `-profile crukmi`. This will download and launch the [`crukmi.config`](../conf/crukmi.config) which has been pre-configured with a setup suitable for the phoenix HPC. Using this profile, singularity images will be downloaded to run on the cluster. -Before running the pipeline you will need to follow the internal documentation to run Nextflow on our systems. Similar to that, you need to set an environment variable `NXF_GLOBAL_CONFIG` to the path of the internal global config which is not publicly available here. +Before running the pipeline you will need to load Nextflow using the environment module system, for example via: -> NB: Nextflow will need to submit the jobs via the job scheduler to the HPC cluster and as such the commands above will have to be executed on one of the login nodes. If in doubt contact IT. +```bash +## Load Nextflow and Singularity environment modules +module purge +module load apps/nextflow/22.04.5 +``` + +The pipeline should always be executed inside a workspace on the `/scratch/` system. All of the intermediate files required to run the pipeline will be stored in the `work/` directory. It is recommended to delete this directory after the pipeline has finished successfully because it can get quite large, and all of the main output files will be saved in the `results/` directory. \ No newline at end of file From d0662a3acbd104f98e216ea9eeba438e1122a75d Mon Sep 17 00:00:00 2001 From: SPearce Date: Mon, 15 Aug 2022 09:33:30 +0100 Subject: [PATCH 03/12] Ran prettier --- docs/crukmi.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/crukmi.md b/docs/crukmi.md index 609a840..91dff58 100644 --- a/docs/crukmi.md +++ b/docs/crukmi.md @@ -12,4 +12,4 @@ module purge module load apps/nextflow/22.04.5 ``` -The pipeline should always be executed inside a workspace on the `/scratch/` system. All of the intermediate files required to run the pipeline will be stored in the `work/` directory. It is recommended to delete this directory after the pipeline has finished successfully because it can get quite large, and all of the main output files will be saved in the `results/` directory. \ No newline at end of file +The pipeline should always be executed inside a workspace on the `/scratch/` system. All of the intermediate files required to run the pipeline will be stored in the `work/` directory. It is recommended to delete this directory after the pipeline has finished successfully because it can get quite large, and all of the main output files will be saved in the `results/` directory. From 8ae3cba5e3e5466f6d9f42eadaf62b0911c821e9 Mon Sep 17 00:00:00 2001 From: SPearce Date: Mon, 15 Aug 2022 11:35:59 +0100 Subject: [PATCH 04/12] Reduce MPILEUP to 5GB --- conf/crukmi.config | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/conf/crukmi.config b/conf/crukmi.config index 3e7383b..f73c252 100644 --- a/conf/crukmi.config +++ b/conf/crukmi.config @@ -18,6 +18,10 @@ process { beforeScript = 'module load apps/singularity/3.8.0' executor = 'pbs' + errorStrategy = {task.exitStatus in [143,137,104,134,139,140] ? 'retry' : 'finish'} + maxErrors = '-1' + maxRetries = 3 + withLabel:process_low { cpus = { check_max( 1 * task.attempt, 'cpus' ) } memory = { check_max( 5.GB * task.attempt, 'memory' ) } @@ -35,7 +39,7 @@ process { withName: 'SAMTOOLS_MPILEUP' { cpus = 1 - memory = { 8.GB * task.attempt } + memory = { 5.GB * task.attempt } } From 1e1ca1e96b2cd3330949d10200eb352ab6470271 Mon Sep 17 00:00:00 2001 From: SPearce Date: Wed, 31 Aug 2022 15:35:57 +0100 Subject: [PATCH 05/12] Incorporating executor options --- conf/crukmi.config | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/conf/crukmi.config b/conf/crukmi.config index f73c252..000eda9 100644 --- a/conf/crukmi.config +++ b/conf/crukmi.config @@ -42,7 +42,12 @@ process { memory = { 5.GB * task.attempt } } +} +executor { + name = 'pbs' + queueSize = 1000 + pollInterval = '10 sec' } params { From 897d0a188b4e2bacba1dee5ff9345d4fef040506 Mon Sep 17 00:00:00 2001 From: Simon Pearce <24893913+SPPearce@users.noreply.github.com> Date: Thu, 1 Sep 2022 10:32:43 +0100 Subject: [PATCH 06/12] Update nfcore_custom.config Co-authored-by: Maxime U. Garcia --- nfcore_custom.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nfcore_custom.config b/nfcore_custom.config index fa26f03..fae764b 100644 --- a/nfcore_custom.config +++ b/nfcore_custom.config @@ -31,7 +31,7 @@ profiles { cheaha { includeConfig "${params.custom_config_base}/conf/cheaha.config" } computerome { includeConfig "${params.custom_config_base}/conf/computerome.config" } crick { includeConfig "${params.custom_config_base}/conf/crick.config" } - crukmi { includeConfig "${params.custom_config_base}/conf/crukmi.config" } + crukmi { includeConfig "${params.custom_config_base}/conf/crukmi.config" } czbiohub_aws { includeConfig "${params.custom_config_base}/conf/czbiohub_aws.config" } denbi_qbic { includeConfig "${params.custom_config_base}/conf/denbi_qbic.config" } ebc { includeConfig "${params.custom_config_base}/conf/ebc.config" } From ba1d3f24cc2dfff418ab9d88920ca753c99aa9aa Mon Sep 17 00:00:00 2001 From: Simon Pearce <24893913+SPPearce@users.noreply.github.com> Date: Thu, 1 Sep 2022 11:07:14 +0100 Subject: [PATCH 07/12] Update crukmi.config --- conf/crukmi.config | 5 ----- 1 file changed, 5 deletions(-) diff --git a/conf/crukmi.config b/conf/crukmi.config index 000eda9..4823585 100644 --- a/conf/crukmi.config +++ b/conf/crukmi.config @@ -37,11 +37,6 @@ process { memory = { check_max( 80.GB * task.attempt, 'memory' ) } } - withName: 'SAMTOOLS_MPILEUP' { - cpus = 1 - memory = { 5.GB * task.attempt } - } - } executor { From 800931bff22fb4c466cf41ae0fe8ea60e2f5085f Mon Sep 17 00:00:00 2001 From: SPearce Date: Thu, 1 Sep 2022 11:13:23 +0100 Subject: [PATCH 08/12] Add Sarek specific config --- conf/pipeline/sarek/crukmi.config | 33 +++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100644 conf/pipeline/sarek/crukmi.config diff --git a/conf/pipeline/sarek/crukmi.config b/conf/pipeline/sarek/crukmi.config new file mode 100644 index 0000000..0b2aecd --- /dev/null +++ b/conf/pipeline/sarek/crukmi.config @@ -0,0 +1,33 @@ +// Profile config names for nf-core/configs + +params { + // Specific nf-core/configs params + config_profile_description = 'Cancer Research UK Manchester Institute HPC cluster profile provided by nf-core/configs' + config_profile_contact = 'Stephen Kitcatt, Simon Pearce (@skitcattCRUKMI, @sppearce)' + config_profile_url = 'http://scicom.picr.man.ac.uk/projects/user-support/wiki' +} + +// Specific nf-core/sarek process configuration +process { + + withName: 'SAMTOOLS_MPILEUP' { + cpus = 1 + memory = { check_resource( 5.GB * task.attempt) } + } + +} + +def check_resource(obj) { + try { + if (obj.getClass() == nextflow.util.MemoryUnit && obj.compareTo(params.max_memory as nextflow.util.MemoryUnit) == 1) + return params.max_memory as nextflow.util.MemoryUnit + else if (obj.getClass() == nextflow.util.Duration && obj.compareTo(params.max_time as nextflow.util.Duration) == 1) + return params.max_time as nextflow.util.Duration + else if (obj.getClass() == java.lang.Integer) + return Math.min(obj, params.max_cpus as int) + else + return obj + } catch (all) { + println " ### ERROR ### Max params max_memory:'${params.max_memory}', max_time:'${params.max_time}' or max_cpus:'${params.max_cpus}' is not valid! Using default value: $obj" + } +} From a8faeda14130ada4323121e6e40ee9ad34b8bdd8 Mon Sep 17 00:00:00 2001 From: Simon Pearce <24893913+SPPearce@users.noreply.github.com> Date: Thu, 1 Sep 2022 11:23:07 +0100 Subject: [PATCH 09/12] Update crukmi.config --- conf/pipeline/sarek/crukmi.config | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/conf/pipeline/sarek/crukmi.config b/conf/pipeline/sarek/crukmi.config index 0b2aecd..66a0b85 100644 --- a/conf/pipeline/sarek/crukmi.config +++ b/conf/pipeline/sarek/crukmi.config @@ -16,18 +16,3 @@ process { } } - -def check_resource(obj) { - try { - if (obj.getClass() == nextflow.util.MemoryUnit && obj.compareTo(params.max_memory as nextflow.util.MemoryUnit) == 1) - return params.max_memory as nextflow.util.MemoryUnit - else if (obj.getClass() == nextflow.util.Duration && obj.compareTo(params.max_time as nextflow.util.Duration) == 1) - return params.max_time as nextflow.util.Duration - else if (obj.getClass() == java.lang.Integer) - return Math.min(obj, params.max_cpus as int) - else - return obj - } catch (all) { - println " ### ERROR ### Max params max_memory:'${params.max_memory}', max_time:'${params.max_time}' or max_cpus:'${params.max_cpus}' is not valid! Using default value: $obj" - } -} From 2c738cae33d2f95ffeb3b95002dde93d004ab1ea Mon Sep 17 00:00:00 2001 From: Simon Pearce <24893913+SPPearce@users.noreply.github.com> Date: Thu, 1 Sep 2022 11:24:11 +0100 Subject: [PATCH 10/12] Update crukmi.config --- conf/pipeline/sarek/crukmi.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/pipeline/sarek/crukmi.config b/conf/pipeline/sarek/crukmi.config index 66a0b85..46c734b 100644 --- a/conf/pipeline/sarek/crukmi.config +++ b/conf/pipeline/sarek/crukmi.config @@ -12,7 +12,7 @@ process { withName: 'SAMTOOLS_MPILEUP' { cpus = 1 - memory = { check_resource( 5.GB * task.attempt) } + memory = { 5.GB * task.attempt } } } From 5981b643c5c97f08963633f3497f238be0fa4b06 Mon Sep 17 00:00:00 2001 From: SPearce Date: Thu, 1 Sep 2022 11:47:12 +0100 Subject: [PATCH 11/12] Updated the sarek pipeline config --- pipeline/sarek.config | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/pipeline/sarek.config b/pipeline/sarek.config index 512541e..12676b2 100644 --- a/pipeline/sarek.config +++ b/pipeline/sarek.config @@ -9,10 +9,11 @@ */ profiles { - munin { includeConfig "${params.custom_config_base}/conf/pipeline/sarek/munin.config" } - uppmax { includeConfig "${params.custom_config_base}/conf/pipeline/sarek/uppmax.config" } - icr_davros { includeConfig "${params.custom_config_base}/conf/pipeline/sarek/icr_davros.config" } cfc { includeConfig "${params.custom_config_base}/conf/pipeline/sarek/cfc.config" } cfc_dev { includeConfig "${params.custom_config_base}/conf/pipeline/sarek/cfc.config" } + crukmi { includeConfig "${params.custom_config_base}/conf/pipeline/sarek/crukmi.config" } eddie { includeConfig "${params.custom_config_base}/conf/pipeline/sarek/eddie.config" } + icr_davros { includeConfig "${params.custom_config_base}/conf/pipeline/sarek/icr_davros.config" } + munin { includeConfig "${params.custom_config_base}/conf/pipeline/sarek/munin.config" } + uppmax { includeConfig "${params.custom_config_base}/conf/pipeline/sarek/uppmax.config" } } From a142ad7eaeab8fab0732a663aae49cd1086bdf3c Mon Sep 17 00:00:00 2001 From: SPearce Date: Thu, 1 Sep 2022 13:55:08 +0100 Subject: [PATCH 12/12] Added link to Sarek specific documentation --- README.md | 1 + docs/pipeline/sarek/crukmi.md | 17 +++++++++++++++++ 2 files changed, 18 insertions(+) create mode 100644 docs/pipeline/sarek/crukmi.md diff --git a/README.md b/README.md index 18d8edbf..445467b 100644 --- a/README.md +++ b/README.md @@ -207,6 +207,7 @@ Currently documentation is available for the following pipelines within specific - rnavar - [MUNIN](docs/pipeline/rnavar/munin.md) - sarek + - [Cancer Research UK Manchester Institute](docs/pipeline/sarek/crukmi.md) - [MUNIN](docs/pipeline/sarek/munin.md) - [UPPMAX](docs/pipeline/sarek/uppmax.md) - taxprofiler diff --git a/docs/pipeline/sarek/crukmi.md b/docs/pipeline/sarek/crukmi.md new file mode 100644 index 0000000..01d030a --- /dev/null +++ b/docs/pipeline/sarek/crukmi.md @@ -0,0 +1,17 @@ +# nf-core/configs: CRUK-MI sarek specific configuration + +Extra specific configuration for sarek pipeline + +## Usage + +To use, run the pipeline with `-profile crukmi`. + +This will download and launch the sarek specific [`crukmi.config`](../../../conf/pipeline/sarek/munin.config) which has been pre-configured with a setup suitable for the Cancer Research UK Manchester Institute cluster (phoenix). + +Example: `nextflow run nf-core/sarek -profile crukmi` + +## Sarek specific configurations for CRUK-MI + +Specific configurations for `CRUK-MI` has been made for sarek. + +- Initial requested resources for SAMTOOLS_MPILEUP are only 5GB and 1 core.