From afe935c2d04c3950b01feb0613f508164c652005 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Fri, 15 Apr 2022 15:27:41 +0200 Subject: [PATCH 01/91] include stub profile --- conf/hasta.config | 22 +++++++++++++++++++--- conf/pipeline/raredisease/hasta.config | 21 +++++++++++++++++++-- 2 files changed, 38 insertions(+), 5 deletions(-) diff --git a/conf/hasta.config b/conf/hasta.config index c5dd5e4..d12cc40 100644 --- a/conf/hasta.config +++ b/conf/hasta.config @@ -15,19 +15,35 @@ singularity { params { max_memory = 180.GB max_cpus = 36 - max_time = 336.h + max_time = 336.h } process { executor = 'slurm' - clusterOptions = { "-A $params.priority ${params.clusterOptions ?: ''}" } + clusterOptions = { "-A $params.priority ${params.clusterOptions ?: ''}" } +} + +params { + max_memory = 180.GB + max_cpus = 36 + max_time = 336.h } profiles { - dev_prio { + stub_prio { params { priority = 'development' clusterOptions = "--qos=low" + max_memory = 1.GB + max_cpus = 1 + max_time = 1.h + } + } + + dev_prio { + params { + priority = 'development' + clusterOptions = "--qos=low" } } diff --git a/conf/pipeline/raredisease/hasta.config b/conf/pipeline/raredisease/hasta.config index 96e3332..9d6e0c9 100644 --- a/conf/pipeline/raredisease/hasta.config +++ b/conf/pipeline/raredisease/hasta.config @@ -1,5 +1,5 @@ process { - + withName:'PICARD_MARKDUPLICATES' { memory = { check_max( 90.GB * task.attempt, 'memory' ) } } @@ -7,4 +7,21 @@ process { cpus = { check_max( 16 * task.attempt, 'cpus' ) } memory = { check_max( 80.GB * task.attempt, 'memory' ) } } -} \ No newline at end of file + withname: BCFTOOLS_VIEW { + if (params.genome == 'GRCh37') { + ext.args = '--output-type v --apply-filters PASS --exclude "INFO/clinical_genomics_mipAF > 0.40 | INFO/swegenAF > 0.40 | INFO/clingen_ngiAF > 0.40 | INFO/gnomad_svAF > 0.40 "' + } else if (params.genome == 'GRCh38') { + ext.args = '--output-type v --apply-filters PASS --exclude "INFO/swegenAF > 0.40"' + } + publishDir = [ + enabled: false, + ] + } + +} + +params { + + sentieon_install_dir = '/home/proj/bin/sentieon/sentieon-genomics-202010.02/bin' + +} From 6fad232232c5fff70fc849092f7f9ba1374c6f39 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Tue, 19 Apr 2022 11:27:35 +0200 Subject: [PATCH 02/91] sentieon env variable --- conf/pipeline/raredisease/hasta.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/pipeline/raredisease/hasta.config b/conf/pipeline/raredisease/hasta.config index 9d6e0c9..7beb3ba 100644 --- a/conf/pipeline/raredisease/hasta.config +++ b/conf/pipeline/raredisease/hasta.config @@ -22,6 +22,6 @@ process { params { - sentieon_install_dir = '/home/proj/bin/sentieon/sentieon-genomics-202010.02/bin' + sentieon_install_dir = "$SENTIEON_INSTALL_DIR/bin" } From 32207a240273e86665c8ed072b3ec3ee358fdde1 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Tue, 19 Apr 2022 11:42:05 +0200 Subject: [PATCH 03/91] update path so that the sentieon version is explicit --- conf/pipeline/raredisease/hasta.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/pipeline/raredisease/hasta.config b/conf/pipeline/raredisease/hasta.config index 7beb3ba..56c1a97 100644 --- a/conf/pipeline/raredisease/hasta.config +++ b/conf/pipeline/raredisease/hasta.config @@ -22,6 +22,6 @@ process { params { - sentieon_install_dir = "$SENTIEON_INSTALL_DIR/bin" + sentieon_install_dir = "$SENTIEON_INSTALL_DIR/sentieon-genomics-202010.02/bin" } From d39bec05dcf0016d720ea73744283150c9e410c1 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Tue, 19 Apr 2022 13:21:51 +0200 Subject: [PATCH 04/91] fix typo --- conf/pipeline/raredisease/hasta.config | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/conf/pipeline/raredisease/hasta.config b/conf/pipeline/raredisease/hasta.config index 56c1a97..4e00c56 100644 --- a/conf/pipeline/raredisease/hasta.config +++ b/conf/pipeline/raredisease/hasta.config @@ -7,11 +7,11 @@ process { cpus = { check_max( 16 * task.attempt, 'cpus' ) } memory = { check_max( 80.GB * task.attempt, 'memory' ) } } - withname: BCFTOOLS_VIEW { + withName: BCFTOOLS_VIEW { if (params.genome == 'GRCh37') { ext.args = '--output-type v --apply-filters PASS --exclude "INFO/clinical_genomics_mipAF > 0.40 | INFO/swegenAF > 0.40 | INFO/clingen_ngiAF > 0.40 | INFO/gnomad_svAF > 0.40 "' } else if (params.genome == 'GRCh38') { - ext.args = '--output-type v --apply-filters PASS --exclude "INFO/swegenAF > 0.40"' + ext.args = '--output-type v --apply-filters PASS --exclude "INFO/swegen_FRQ > 0.40"' } publishDir = [ enabled: false, From 3e3e2be04beda1477a26b53f94d9165aee9147d2 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Fri, 22 Apr 2022 00:09:29 +0200 Subject: [PATCH 05/91] remove duplicate entries --- conf/hasta.config | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/conf/hasta.config b/conf/hasta.config index d12cc40..03e61b8 100644 --- a/conf/hasta.config +++ b/conf/hasta.config @@ -23,18 +23,12 @@ process { clusterOptions = { "-A $params.priority ${params.clusterOptions ?: ''}" } } -params { - max_memory = 180.GB - max_cpus = 36 - max_time = 336.h -} - profiles { stub_prio { params { priority = 'development' clusterOptions = "--qos=low" - max_memory = 1.GB + max_memory = 6.GB max_cpus = 1 max_time = 1.h } From 447b3ee05d5a696a39d222d1b1c703a2e77725a8 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Fri, 22 Apr 2022 11:03:09 +0200 Subject: [PATCH 06/91] add beforescript for sentieon --- conf/pipeline/raredisease/hasta.config | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/conf/pipeline/raredisease/hasta.config b/conf/pipeline/raredisease/hasta.config index 4e00c56..18c7bab 100644 --- a/conf/pipeline/raredisease/hasta.config +++ b/conf/pipeline/raredisease/hasta.config @@ -7,6 +7,12 @@ process { cpus = { check_max( 16 * task.attempt, 'cpus' ) } memory = { check_max( 80.GB * task.attempt, 'memory' ) } } + withLabel:'sentieon' { + beforeScript = { [ + "export SENTIEON_LICENSE=\$(echo -e \$SENTIEON_LICENSE_BASE64 | base64 -d)", + "export PATH=\$PATH:\$SENTIEON_INSTALL_DIR/sentieon-genomics-202010.02/bin" + ].join(";") } + } withName: BCFTOOLS_VIEW { if (params.genome == 'GRCh37') { ext.args = '--output-type v --apply-filters PASS --exclude "INFO/clinical_genomics_mipAF > 0.40 | INFO/swegenAF > 0.40 | INFO/clingen_ngiAF > 0.40 | INFO/gnomad_svAF > 0.40 "' @@ -20,8 +26,3 @@ process { } -params { - - sentieon_install_dir = "$SENTIEON_INSTALL_DIR/sentieon-genomics-202010.02/bin" - -} From 9da85209f234e10d8e84b11d9bd7ee39a2a2fc4f Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Fri, 22 Apr 2022 11:05:56 +0200 Subject: [PATCH 07/91] change bcftools output format --- conf/pipeline/raredisease/hasta.config | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/conf/pipeline/raredisease/hasta.config b/conf/pipeline/raredisease/hasta.config index 18c7bab..1359522 100644 --- a/conf/pipeline/raredisease/hasta.config +++ b/conf/pipeline/raredisease/hasta.config @@ -15,9 +15,9 @@ process { } withName: BCFTOOLS_VIEW { if (params.genome == 'GRCh37') { - ext.args = '--output-type v --apply-filters PASS --exclude "INFO/clinical_genomics_mipAF > 0.40 | INFO/swegenAF > 0.40 | INFO/clingen_ngiAF > 0.40 | INFO/gnomad_svAF > 0.40 "' + ext.args = '--output-type z --apply-filters PASS --exclude "INFO/clinical_genomics_mipAF > 0.40 | INFO/swegenAF > 0.40 | INFO/clingen_ngiAF > 0.40 | INFO/gnomad_svAF > 0.40 "' } else if (params.genome == 'GRCh38') { - ext.args = '--output-type v --apply-filters PASS --exclude "INFO/swegen_FRQ > 0.40"' + ext.args = '--output-type z --apply-filters PASS --exclude "INFO/swegen_FRQ > 0.40"' } publishDir = [ enabled: false, From 40c965c76b612925d69087976644c1c0243ba46c Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Fri, 29 Apr 2022 00:20:57 +0200 Subject: [PATCH 08/91] fix sentieon errors --- conf/pipeline/raredisease/hasta.config | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/conf/pipeline/raredisease/hasta.config b/conf/pipeline/raredisease/hasta.config index 1359522..fb4d94a 100644 --- a/conf/pipeline/raredisease/hasta.config +++ b/conf/pipeline/raredisease/hasta.config @@ -8,10 +8,7 @@ process { memory = { check_max( 80.GB * task.attempt, 'memory' ) } } withLabel:'sentieon' { - beforeScript = { [ - "export SENTIEON_LICENSE=\$(echo -e \$SENTIEON_LICENSE_BASE64 | base64 -d)", - "export PATH=\$PATH:\$SENTIEON_INSTALL_DIR/sentieon-genomics-202010.02/bin" - ].join(";") } + beforeScript = { "export PATH=\$PATH:\$SENTIEON_INSTALL_DIR/sentieon-genomics-202010.02/bin" } } withName: BCFTOOLS_VIEW { if (params.genome == 'GRCh37') { From d8c27759761232798ce6a295a98426b66adb39ee Mon Sep 17 00:00:00 2001 From: Andrew Frank Date: Tue, 7 Jun 2022 09:41:41 -0400 Subject: [PATCH 09/91] Update google.config to fix custom VM memory error --- conf/google.config | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/conf/google.config b/conf/google.config index 41f0e1f..6e8a45a 100644 --- a/conf/google.config +++ b/conf/google.config @@ -19,4 +19,6 @@ google.lifeSciences.preemptible = params.google_preemptible if (google.lifeSciences.preemptible) { process.errorStrategy = { task.exitStatus in [8,10,14] ? 'retry' : 'terminate' } process.maxRetries = 5 -} \ No newline at end of file +} + +process.machineType = { task.memory > task.cpus * 6.GB ? ['custom', task.cpus, task.cpus * 6656].join('-') : null } From 2898736cbe223c19d0cc09c1e779783cd6ccc942 Mon Sep 17 00:00:00 2001 From: Bruno Grande Date: Wed, 8 Jun 2022 14:16:52 -0700 Subject: [PATCH 10/91] Add Sage Bionetworks nf-core configuration --- .github/workflows/main.yml | 1 + README.md | 1 + conf/sage.config | 58 ++++++++++++++++++++++++++++++++++++++ docs/sage.md | 24 ++++++++++++++++ nfcore_custom.config | 1 + 5 files changed, 85 insertions(+) create mode 100644 conf/sage.config create mode 100644 docs/sage.md diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index ac31dfe..c142eb3 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -77,6 +77,7 @@ jobs: - "phoenix" - "prince" - "rosalind" + - "sage" - "sahmri" - "sanger" - "seg_globe" diff --git a/README.md b/README.md index 1f37d29..978e606 100644 --- a/README.md +++ b/README.md @@ -131,6 +131,7 @@ Currently documentation is available for the following systems: - [PHOENIX](docs/phoenix.md) - [PRINCE](docs/prince.md) - [ROSALIND](docs/rosalind.md) +- [SAGE BIONETWORKS](docs/sage.md) - [SANGER](docs/sanger.md) - [SEG_GLOBE](docs/seg_globe.md) - [UCT_HPC](docs/uct_hpc.md) diff --git a/conf/sage.config b/conf/sage.config new file mode 100644 index 0000000..5581d85 --- /dev/null +++ b/conf/sage.config @@ -0,0 +1,58 @@ +params { + config_profile_description = 'The Sage Bionetworks profile' + config_profile_contact = 'Bruno Grande (@BrunoGrandePhD)' + config_profile_url = 'https://github.com/Sage-Bionetworks-Workflows' +} + +process { + + cpus = { check_max( 1 * slow(task.attempt), 'cpus' ) } + memory = { check_max( 6.GB * task.attempt, 'memory' ) } + time = { check_max( 24.h * task.attempt, 'time' ) } + + errorStrategy = { task.exitStatus in [143,137,104,134,139,247] ? 'retry' : 'finish' } + maxRetries = 5 + maxErrors = '-1' + + // Process-specific resource requirements + withLabel:process_low { + cpus = { check_max( 4 * slow(task.attempt), 'cpus' ) } + memory = { check_max( 12.GB * task.attempt, 'memory' ) } + time = { check_max( 24.h * task.attempt, 'time' ) } + } + withLabel:process_medium { + cpus = { check_max( 12 * slow(task.attempt), 'cpus' ) } + memory = { check_max( 36.GB * task.attempt, 'memory' ) } + time = { check_max( 48.h * task.attempt, 'time' ) } + } + withLabel:process_high { + cpus = { check_max( 24 * slow(task.attempt), 'cpus' ) } + memory = { check_max( 72.GB * task.attempt, 'memory' ) } + time = { check_max( 96.h * task.attempt, 'time' ) } + } + withLabel:process_long { + time = { check_max( 192.h * task.attempt, 'time' ) } + } + withLabel:process_high_memory { + memory = { check_max( 200.GB * task.attempt, 'memory' ) } + } + +} + +aws { + region = "us-east-1" +} + +params { + igenomes_base = 's3://sage-igenomes/igenomes' + max_memory = 512.GB + max_cpus = 64 + max_time = 168.h // One week +} + +// Function to slow the increase of the resource multipler +// as attempts are made. The rationale is that some CPUs +// don't need to be increased as fast as memory. +def slow(attempt, factor = 2) { + return Math.ceil( attempt / factor) as int +} diff --git a/docs/sage.md b/docs/sage.md new file mode 100644 index 0000000..755e0c2 --- /dev/null +++ b/docs/sage.md @@ -0,0 +1,24 @@ +# nf-core/configs: Sage Bionetworks Configuration + +To use this custom configuration, run the pipeline with `-profile sage`. This will download and launch the [`sage.config`](../conf/sage.config), which contains a number of optimizations relevant to Sage employees running workflows on AWS (_e.g._ using Nextflow Tower). These include: + +- Updating the default value for `igenomes_base` to `s3://sage-igenomes` +- Increasing the default time limits because we run pipelines on AWS +- Enabling retries by default when exit codes relate to insufficient memory +- Allow pending jobs to finish if the number of retries are exhausted +- Slowing the increase in the number of allocated CPU cores on retries + +## Additional information about iGenomes + +The following iGenomes prefixes have been copied from `s3://ngi-igenomes/` (`eu-west-1`) to `s3://sage-igenomes` (`us-east-1`). See [this script](https://github.com/Sage-Bionetworks-Workflows/nextflow-infra/blob/main/bin/mirror-igenomes.sh) for more information. The `sage-igenomes` S3 bucket has been configured to openly available, but files cannot be downloaded out of `us-east-1` to avoid egress charges. You can check the `conf/igenomes.config` file in each nf-core pipeline to figure out the mapping between genome IDs (_i.e._ for `--genome`) and iGenomes prefixes ([example](https://github.com/nf-core/rnaseq/blob/89bf536ce4faa98b4d50a8ec0a0343780bc62e0a/conf/igenomes.config#L14-L26)). + +- **Human Genome Builds** + - `Homo_sapiens/Ensembl/GRCh37` + - `Homo_sapiens/GATK/GRCh37` + - `Homo_sapiens/UCSC/hg19` + - `Homo_sapiens/GATK/GRCh38` + - `Homo_sapiens/NCBI/GRCh38` + - `Homo_sapiens/UCSC/hg38` +- **Mouse Genome Builds** + - `Mus_musculus/Ensembl/GRCm38` + - `Mus_musculus/UCSC/mm10` diff --git a/nfcore_custom.config b/nfcore_custom.config index 88e7227..4b44d8a 100644 --- a/nfcore_custom.config +++ b/nfcore_custom.config @@ -59,6 +59,7 @@ profiles { phoenix { includeConfig "${params.custom_config_base}/conf/phoenix.config" } prince { includeConfig "${params.custom_config_base}/conf/prince.config" } rosalind { includeConfig "${params.custom_config_base}/conf/rosalind.config" } + sage { includeConfig "${params.custom_config_base}/conf/sage.config" } sahmri { includeConfig "${params.custom_config_base}/conf/sahmri.config" } sanger { includeConfig "${params.custom_config_base}/conf/sanger.config"} seg_globe { includeConfig "${params.custom_config_base}/conf/seg_globe.config"} From 72e995f32678c8a94c0a466f91b61973363d528b Mon Sep 17 00:00:00 2001 From: Emil Bertilsson Date: Thu, 9 Jun 2022 10:28:20 +0200 Subject: [PATCH 11/91] Adds generic fix for java memory issue on slurm --- conf/pipeline/raredisease/hasta.config | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/conf/pipeline/raredisease/hasta.config b/conf/pipeline/raredisease/hasta.config index a076dbc..c31b940 100644 --- a/conf/pipeline/raredisease/hasta.config +++ b/conf/pipeline/raredisease/hasta.config @@ -7,7 +7,13 @@ process { cpus = { check_max( 16 * task.attempt, 'cpus' ) } memory = { check_max( 80.GB * task.attempt, 'memory' ) } } + + // Java memory fixes withName:'QUALIMAP_BAMQC' { - ext.args = { "--java-mem-size=${task.memory.giga / 1.15 as long}G" } + clusterOptions = { "-A $params.priority ${params.clusterOptions ?: ''} ${task.memory ? "--mem ${task.memory.mega * 1.15 as long}M" : ''}" } } + withName:'PICARD_MARKDUPLICATES' { + clusterOptions = { "-A $params.priority ${params.clusterOptions ?: ''} ${task.memory ? "--mem ${task.memory.mega * 1.15 as long}M" : ''}" } + } + } \ No newline at end of file From c7093dee1306bd24bbdc6200097fcc4fccfda2d0 Mon Sep 17 00:00:00 2001 From: Bruno Grande Date: Thu, 16 Jun 2022 12:37:32 -0700 Subject: [PATCH 12/91] Reduce max_memory to fit on an instance --- conf/sage.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/sage.config b/conf/sage.config index 5581d85..da1db72 100644 --- a/conf/sage.config +++ b/conf/sage.config @@ -45,7 +45,7 @@ aws { params { igenomes_base = 's3://sage-igenomes/igenomes' - max_memory = 512.GB + max_memory = 500.GB max_cpus = 64 max_time = 168.h // One week } From ecd898584dc30f42612b44634e618271550a7cfb Mon Sep 17 00:00:00 2001 From: Bruno Grande Date: Wed, 22 Jun 2022 13:54:13 -0700 Subject: [PATCH 13/91] Handle Sarek-specific `max` labels --- conf/sage.config | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/conf/sage.config b/conf/sage.config index da1db72..d2e9d21 100644 --- a/conf/sage.config +++ b/conf/sage.config @@ -34,7 +34,15 @@ process { time = { check_max( 192.h * task.attempt, 'time' ) } } withLabel:process_high_memory { - memory = { check_max( 200.GB * task.attempt, 'memory' ) } + memory = { check_max( 128.GB * task.attempt, 'memory' ) } + } + + // Preventing Sarek labels from using the actual maximums + withLabel:max_memory { + memory = { check_max( 128.GB * task.attempt, 'memory' ) } + } + withLabel:cpus_max { + cpus = { check_max( 24 * slow(task.attempt), 'cpus' ) } } } From 37121faf4a6da6e3130defdf7eb00a0dd113010a Mon Sep 17 00:00:00 2001 From: Bruno Grande Date: Wed, 22 Jun 2022 14:37:43 -0700 Subject: [PATCH 14/91] Fix typoe with label name --- conf/sage.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/sage.config b/conf/sage.config index d2e9d21..3b2a423 100644 --- a/conf/sage.config +++ b/conf/sage.config @@ -38,7 +38,7 @@ process { } // Preventing Sarek labels from using the actual maximums - withLabel:max_memory { + withLabel:memory_max { memory = { check_max( 128.GB * task.attempt, 'memory' ) } } withLabel:cpus_max { From 1c7230cb08791b93bcca612836b5d68afade8b66 Mon Sep 17 00:00:00 2001 From: Bruno Grande Date: Thu, 23 Jun 2022 09:15:42 -0700 Subject: [PATCH 15/91] Add Sage-specific Sarek config --- README.md | 2 +- conf/pipeline/sarek/sage.config | 37 +++++++++++++++++++++++++++++++++ docs/pipeline/sarek/sage.md | 7 +++++++ docs/sage.md | 14 +++++++------ pipeline/sarek.config | 3 ++- 5 files changed, 55 insertions(+), 8 deletions(-) create mode 100644 conf/pipeline/sarek/sage.config create mode 100644 docs/pipeline/sarek/sage.md diff --git a/README.md b/README.md index 978e606..0d561fc 100644 --- a/README.md +++ b/README.md @@ -10,7 +10,6 @@ A repository for hosting Nextflow configuration files containing custom paramete - [Configuration and parameters](#configuration-and-parameters) - [Offline usage](#offline-usage) - [Adding a new config](#adding-a-new-config) - - [Checking user hostnames](#checking-user-hostnames) - [Testing](#testing) - [Documentation](#documentation) - [Uploading to `nf-core/configs`](#uploading-to-nf-coreconfigs) @@ -203,6 +202,7 @@ Currently documentation is available for the following pipelines within specific - [MUNIN](docs/pipeline/rnavar/munin.md) - sarek - [MUNIN](docs/pipeline/sarek/munin.md) + - [SAGE BIONETWORKS](docs/pipeline/sarek/sage.md) - [UPPMAX](docs/pipeline/sarek/uppmax.md) - taxprofiler - [EVA](docs/pipeline/taxprofiler/eva.md) diff --git a/conf/pipeline/sarek/sage.config b/conf/pipeline/sarek/sage.config new file mode 100644 index 0000000..b7766bc --- /dev/null +++ b/conf/pipeline/sarek/sage.config @@ -0,0 +1,37 @@ +params { + config_profile_description = 'The Sage Bionetworks profile' + config_profile_contact = 'Bruno Grande (@BrunoGrandePhD)' + config_profile_url = 'https://github.com/Sage-Bionetworks-Workflows' +} + +// Function to ensure that resource requirements don't go beyond a maximum limit +def check_max(obj, type) { + if (type == 'memory') { + try { + if (obj.compareTo(params.max_memory as nextflow.util.MemoryUnit) == 1) + return params.max_memory as nextflow.util.MemoryUnit + else + return obj + } catch (all) { + println " ### ERROR ### Max memory '${params.max_memory}' is not valid! Using default value: $obj" + return obj + } + } else if (type == 'time') { + try { + if (obj.compareTo(params.max_time as nextflow.util.Duration) == 1) + return params.max_time as nextflow.util.Duration + else + return obj + } catch (all) { + println " ### ERROR ### Max time '${params.max_time}' is not valid! Using default value: $obj" + return obj + } + } else if (type == 'cpus') { + try { + return Math.min( obj, params.max_cpus as int ) + } catch (all) { + println " ### ERROR ### Max cpus '${params.max_cpus}' is not valid! Using default value: $obj" + return obj + } + } +} diff --git a/docs/pipeline/sarek/sage.md b/docs/pipeline/sarek/sage.md new file mode 100644 index 0000000..646fdda --- /dev/null +++ b/docs/pipeline/sarek/sage.md @@ -0,0 +1,7 @@ +# nf-core/configs: Sage Bionetworks Sarek-Specific Configuration + +To use this custom configuration, run the pipeline with `-profile sage`. This will download and load the [`sage.config`](../conf/sage.config), which contains a number of optimizations relevant to Sage employees running workflows on AWS (_e.g._ using Nextflow Tower). This profile will also load any applicable pipeline-specific configuration. + +In addition to the global configuration described [here](../../sage.md), this Sarek-specific configuration includes the following tweaks: + +- Define the `check_max()` function, which is missing in Sarek v2. diff --git a/docs/sage.md b/docs/sage.md index 755e0c2..5b0fa49 100644 --- a/docs/sage.md +++ b/docs/sage.md @@ -1,12 +1,14 @@ -# nf-core/configs: Sage Bionetworks Configuration +# nf-core/configs: Sage Bionetworks Global Configuration -To use this custom configuration, run the pipeline with `-profile sage`. This will download and launch the [`sage.config`](../conf/sage.config), which contains a number of optimizations relevant to Sage employees running workflows on AWS (_e.g._ using Nextflow Tower). These include: +To use this custom configuration, run the pipeline with `-profile sage`. This will download and load the [`sage.config`](../conf/sage.config), which contains a number of optimizations relevant to Sage employees running workflows on AWS (_e.g._ using Nextflow Tower). This profile will also load any applicable pipeline-specific configuration. -- Updating the default value for `igenomes_base` to `s3://sage-igenomes` -- Increasing the default time limits because we run pipelines on AWS -- Enabling retries by default when exit codes relate to insufficient memory +This global configuration includes the following tweaks: + +- Update the default value for `igenomes_base` to `s3://sage-igenomes` +- Increase the default time limits because we run pipelines on AWS +- Enable retries by default when exit codes relate to insufficient memory - Allow pending jobs to finish if the number of retries are exhausted -- Slowing the increase in the number of allocated CPU cores on retries +- Slow the increase in the number of allocated CPU cores on retries ## Additional information about iGenomes diff --git a/pipeline/sarek.config b/pipeline/sarek.config index 57d7bdf..36efc99 100644 --- a/pipeline/sarek.config +++ b/pipeline/sarek.config @@ -15,4 +15,5 @@ profiles { cfc { includeConfig "${params.custom_config_base}/conf/pipeline/sarek/cfc.config" } cfc_dev { includeConfig "${params.custom_config_base}/conf/pipeline/sarek/cfc.config" } eddie { includeConfig "${params.custom_config_base}/conf/pipeline/sarek/eddie.config" } -} \ No newline at end of file + sage { includeConfig "${params.custom_config_base}/conf/pipeline/sarek/sage.config" } +} From a19d79f6ffcf1f7c20033e9247a21e2119a0941d Mon Sep 17 00:00:00 2001 From: Bruno Grande Date: Thu, 23 Jun 2022 09:44:16 -0700 Subject: [PATCH 16/91] Move `check_max()` to global config --- conf/sage.config | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/conf/sage.config b/conf/sage.config index 3b2a423..b559f79 100644 --- a/conf/sage.config +++ b/conf/sage.config @@ -64,3 +64,37 @@ params { def slow(attempt, factor = 2) { return Math.ceil( attempt / factor) as int } + + +// Function to ensure that resource requirements don't go +// beyond a maximum limit +def check_max(obj, type) { + if (type == 'memory') { + try { + if (obj.compareTo(params.max_memory as nextflow.util.MemoryUnit) == 1) + return params.max_memory as nextflow.util.MemoryUnit + else + return obj + } catch (all) { + println " ### ERROR ### Max memory '${params.max_memory}' is not valid! Using default value: $obj" + return obj + } + } else if (type == 'time') { + try { + if (obj.compareTo(params.max_time as nextflow.util.Duration) == 1) + return params.max_time as nextflow.util.Duration + else + return obj + } catch (all) { + println " ### ERROR ### Max time '${params.max_time}' is not valid! Using default value: $obj" + return obj + } + } else if (type == 'cpus') { + try { + return Math.min( obj, params.max_cpus as int ) + } catch (all) { + println " ### ERROR ### Max cpus '${params.max_cpus}' is not valid! Using default value: $obj" + return obj + } + } +} From 6f0d9e6c43b8ee0373011f32d4dbf3c17811cd78 Mon Sep 17 00:00:00 2001 From: Bruno Grande Date: Thu, 23 Jun 2022 10:10:28 -0700 Subject: [PATCH 17/91] Complete `check_max()` move to global config --- README.md | 1 - conf/pipeline/sarek/sage.config | 37 --------------------------------- conf/sage.config | 2 +- docs/pipeline/sarek/sage.md | 7 ------- docs/sage.md | 1 + pipeline/sarek.config | 1 - 6 files changed, 2 insertions(+), 47 deletions(-) delete mode 100644 conf/pipeline/sarek/sage.config delete mode 100644 docs/pipeline/sarek/sage.md diff --git a/README.md b/README.md index 0d561fc..d86461c 100644 --- a/README.md +++ b/README.md @@ -202,7 +202,6 @@ Currently documentation is available for the following pipelines within specific - [MUNIN](docs/pipeline/rnavar/munin.md) - sarek - [MUNIN](docs/pipeline/sarek/munin.md) - - [SAGE BIONETWORKS](docs/pipeline/sarek/sage.md) - [UPPMAX](docs/pipeline/sarek/uppmax.md) - taxprofiler - [EVA](docs/pipeline/taxprofiler/eva.md) diff --git a/conf/pipeline/sarek/sage.config b/conf/pipeline/sarek/sage.config deleted file mode 100644 index b7766bc..0000000 --- a/conf/pipeline/sarek/sage.config +++ /dev/null @@ -1,37 +0,0 @@ -params { - config_profile_description = 'The Sage Bionetworks profile' - config_profile_contact = 'Bruno Grande (@BrunoGrandePhD)' - config_profile_url = 'https://github.com/Sage-Bionetworks-Workflows' -} - -// Function to ensure that resource requirements don't go beyond a maximum limit -def check_max(obj, type) { - if (type == 'memory') { - try { - if (obj.compareTo(params.max_memory as nextflow.util.MemoryUnit) == 1) - return params.max_memory as nextflow.util.MemoryUnit - else - return obj - } catch (all) { - println " ### ERROR ### Max memory '${params.max_memory}' is not valid! Using default value: $obj" - return obj - } - } else if (type == 'time') { - try { - if (obj.compareTo(params.max_time as nextflow.util.Duration) == 1) - return params.max_time as nextflow.util.Duration - else - return obj - } catch (all) { - println " ### ERROR ### Max time '${params.max_time}' is not valid! Using default value: $obj" - return obj - } - } else if (type == 'cpus') { - try { - return Math.min( obj, params.max_cpus as int ) - } catch (all) { - println " ### ERROR ### Max cpus '${params.max_cpus}' is not valid! Using default value: $obj" - return obj - } - } -} diff --git a/conf/sage.config b/conf/sage.config index b559f79..e5bfa8b 100644 --- a/conf/sage.config +++ b/conf/sage.config @@ -67,7 +67,7 @@ def slow(attempt, factor = 2) { // Function to ensure that resource requirements don't go -// beyond a maximum limit +// beyond a maximum limit (copied here for Sarek v2) def check_max(obj, type) { if (type == 'memory') { try { diff --git a/docs/pipeline/sarek/sage.md b/docs/pipeline/sarek/sage.md deleted file mode 100644 index 646fdda..0000000 --- a/docs/pipeline/sarek/sage.md +++ /dev/null @@ -1,7 +0,0 @@ -# nf-core/configs: Sage Bionetworks Sarek-Specific Configuration - -To use this custom configuration, run the pipeline with `-profile sage`. This will download and load the [`sage.config`](../conf/sage.config), which contains a number of optimizations relevant to Sage employees running workflows on AWS (_e.g._ using Nextflow Tower). This profile will also load any applicable pipeline-specific configuration. - -In addition to the global configuration described [here](../../sage.md), this Sarek-specific configuration includes the following tweaks: - -- Define the `check_max()` function, which is missing in Sarek v2. diff --git a/docs/sage.md b/docs/sage.md index 5b0fa49..133ccec 100644 --- a/docs/sage.md +++ b/docs/sage.md @@ -9,6 +9,7 @@ This global configuration includes the following tweaks: - Enable retries by default when exit codes relate to insufficient memory - Allow pending jobs to finish if the number of retries are exhausted - Slow the increase in the number of allocated CPU cores on retries +- Define the `check_max()` function, which is missing in Sarek v2 ## Additional information about iGenomes diff --git a/pipeline/sarek.config b/pipeline/sarek.config index 36efc99..512541e 100644 --- a/pipeline/sarek.config +++ b/pipeline/sarek.config @@ -15,5 +15,4 @@ profiles { cfc { includeConfig "${params.custom_config_base}/conf/pipeline/sarek/cfc.config" } cfc_dev { includeConfig "${params.custom_config_base}/conf/pipeline/sarek/cfc.config" } eddie { includeConfig "${params.custom_config_base}/conf/pipeline/sarek/eddie.config" } - sage { includeConfig "${params.custom_config_base}/conf/pipeline/sarek/sage.config" } } From bb35d8e5d18f7d99b16f6f209b0e0ba589b6842a Mon Sep 17 00:00:00 2001 From: Gwenna Breton Date: Thu, 30 Jun 2022 11:42:36 +0200 Subject: [PATCH 18/91] Added conf file for medair --- conf/medair.config | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) create mode 100644 conf/medair.config diff --git a/conf/medair.config b/conf/medair.config new file mode 100644 index 0000000..6cce6f9 --- /dev/null +++ b/conf/medair.config @@ -0,0 +1,39 @@ +//Profile config names for nf-core/configs +params { + config_profile_description = 'Cluster profile for medair (local cluster of Clinical Genomics Gothenburg)' + config_profile_contact = 'Clinical Genomics, Gothenburg' + config_profile_url = 'https://www.scilifelab.se/units/clinical-genomics-goteborg/' +} + +//Nextflow parameters +singularity { + enabled = true +} + +profiles { + + wgs { + process { + queue = 'wgs.q' + executor = 'sge' + penv = 'mpi' + process.clusterOptions = '-l excl=1' + params.max_cpus = 40 + params.max_time = 48.h + params.max_memory = 128.GB + } + } + + production { + process { + queue = 'production.q' + executor = 'sge' + penv = 'mpi' + process.clusterOptions = '-l excl=1' + params.max_cpus = 40 + params.max_time = 480.h + params.max_memory = 128.GB + } + } +} + From f718025e79a0e204c6339ce05a7c1915fcb2e66e Mon Sep 17 00:00:00 2001 From: Gwenna Breton Date: Thu, 30 Jun 2022 11:43:04 +0200 Subject: [PATCH 19/91] Added doc file for medair --- docs/medair.md | 43 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) create mode 100644 docs/medair.md diff --git a/docs/medair.md b/docs/medair.md new file mode 100644 index 0000000..3edcfc4 --- /dev/null +++ b/docs/medair.md @@ -0,0 +1,43 @@ +# nf-core/configs: Medair Configuration + +All nf-core pipelines have been successfully configured for use on the medair cluster at Clinical Genomics Gothenburg. + +To use, run the pipeline with `-profile medair`. This will download and launch the [`medair.config`](../conf/medair.config) which has been pre-configured with a setup suitable for the medair cluster. +It will enable Nextflow to manage the pipeline jobs via the `SGE` job scheduler. +Using this profile, a docker image containing all of the required software will be downloaded, and converted to a Singularity image before execution of the pipeline. + +## Modules to load + +Before running the pipeline you will need to load Nextflow and Singularity using the environment module system on medair. You can do this by issuing the commands below: + +```bash +## Load Nextflow and Singularity environment modules +module purge +module load nextflow/21.10.5.5658 +module load singularity/v3.4.0 +``` + +>Should we link to Confluence page about new modules? See something about updating modules? etc. +>Is this a good location to mention the nf-core conda environment? module load miniconda; source activate nf-core + +## Storage of Singularity images + +When downloading a new nf-core pipeline for the first time (or a specific version of a pipeline), you can choose to store the Singularity image for future use. A central location for these images is: `/apps/bio/dependencies/nf-core/singularities` + +Cached Singularity images can be accessed by running (or adding to your `.bashrc`) the following line: `export NXF_SINGULARITY_CACHEDIR="/apps/bio/dependencies/nf-core/singularities"` #is that correct?? + +## Different profiles depending on what you are running + +Depending on what you are running, you can choose between the `clinic`, `research`, `byss` and `qd_rnaseq` profiles. This specify for example which queue will be used. + +>NB: say more about the different queues? + +## iGenomes specific configuration + +>TODO modify this part. Do we want to have the iGenomes somewhere? + +A local copy of the iGenomes resource has been made available on PROFILE CLUSTER so you should be able to run the pipeline against any reference available in the `igenomes.config` specific to the nf-core pipeline. +You can do this by simply using the `--genome ` parameter. + +>NB: You will need an account to use the HPC cluster on PROFILE CLUSTER in order to run the pipeline. If in doubt contact IT. +>NB: Nextflow will need to submit the jobs via the job scheduler to the HPC cluster and as such the commands above will have to be executed on one of the login nodes. If in doubt contact IT. From 77017e877af4c1de90bb0d37ff52586dcb0f739b Mon Sep 17 00:00:00 2001 From: Gwenna Breton Date: Thu, 30 Jun 2022 11:48:34 +0200 Subject: [PATCH 20/91] Added medair configuration docs to the list --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index d86461c..94ce55c 100644 --- a/README.md +++ b/README.md @@ -120,6 +120,7 @@ Currently documentation is available for the following systems: - [LUGH](docs/lugh.md) - [MAESTRO](docs/maestro.md) - [MARVIN](docs/marvin.md) +- [MEDAIR](docs/medair.md) - [MJOLNIR_GLOBE](docs/mjolnir_globe.md) - [MPCDF](docs/mpcdf.md) - [MUNIN](docs/munin.md) From abccece8a38b8b18daebae8d6b8a82e21be5cc4d Mon Sep 17 00:00:00 2001 From: Gwenna Breton Date: Thu, 30 Jun 2022 11:49:47 +0200 Subject: [PATCH 21/91] Added medair config file to the list --- nfcore_custom.config | 1 + 1 file changed, 1 insertion(+) diff --git a/nfcore_custom.config b/nfcore_custom.config index 4b44d8a..6f0ac6c 100644 --- a/nfcore_custom.config +++ b/nfcore_custom.config @@ -49,6 +49,7 @@ profiles { lugh { includeConfig "${params.custom_config_base}/conf/lugh.config" } maestro { includeConfig "${params.custom_config_base}/conf/maestro.config" } marvin { includeConfig "${params.custom_config_base}/conf/marvin.config" } + medair { includeConfig "${params.custom_config_base}/conf/medair.config" } mjolnir_globe { includeConfig "${params.custom_config_base}/conf/mjolnir_globe.config" } mpcdf { includeConfig "${params.custom_config_base}/conf/mpcdf.config" } munin { includeConfig "${params.custom_config_base}/conf/munin.config" } From 77dba3256aa4b948dad9bad16036f8a621eff98b Mon Sep 17 00:00:00 2001 From: Gwenna Breton Date: Thu, 30 Jun 2022 11:53:02 +0200 Subject: [PATCH 22/91] Add profile name to test scope --- .github/workflows/main.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index c142eb3..f5ba0d1 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -66,6 +66,7 @@ jobs: - "jax" - "lugh" - "marvin" + - "medair" - "mjolnir_globe" - "maestro" - "mpcdf" From 22f73b06f9da47ebc1c96bf14dabecc4918be938 Mon Sep 17 00:00:00 2001 From: Gwenna Breton Date: Thu, 30 Jun 2022 12:00:05 +0200 Subject: [PATCH 23/91] Updated info about the different profiles --- docs/medair.md | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/docs/medair.md b/docs/medair.md index 3edcfc4..834ac1d 100644 --- a/docs/medair.md +++ b/docs/medair.md @@ -28,9 +28,7 @@ Cached Singularity images can be accessed by running (or adding to your `.bashrc ## Different profiles depending on what you are running -Depending on what you are running, you can choose between the `clinic`, `research`, `byss` and `qd_rnaseq` profiles. This specify for example which queue will be used. - ->NB: say more about the different queues? +Depending on what you are running, you can choose between the `wgs` and `production` profiles. Jobs running with the `wgs` profile run on a queue with higher priority. Jobs running with the `production` profile can last longer (max time: 20 times, versus 2 days for the `wgs` profile). ## iGenomes specific configuration From 2398110ea54f5f07ddecf5c6b08c52a1f308d3c6 Mon Sep 17 00:00:00 2001 From: Gwenna Breton Date: Thu, 30 Jun 2022 14:24:17 +0200 Subject: [PATCH 24/91] Added info on dowloading pipelines --- docs/medair.md | 49 +++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 39 insertions(+), 10 deletions(-) diff --git a/docs/medair.md b/docs/medair.md index 834ac1d..9f2b51f 100644 --- a/docs/medair.md +++ b/docs/medair.md @@ -6,9 +6,43 @@ To use, run the pipeline with `-profile medair`. This will download and launch t It will enable Nextflow to manage the pipeline jobs via the `SGE` job scheduler. Using this profile, a docker image containing all of the required software will be downloaded, and converted to a Singularity image before execution of the pipeline. -## Modules to load +## Download nf-core pipelines -Before running the pipeline you will need to load Nextflow and Singularity using the environment module system on medair. You can do this by issuing the commands below: +### Set-up + +First you need to load relevant softwares: Nextflow and nf-core tools. You can do it as follow: + +```bash +## Load Nextflow +module load nextflow/21.10.5.5658 +## Load nf-core tools +module load miniconda +source activate nf-core +``` + +### Download a pipeline + +We have started to download pipelines in the following location: `/apps/bio/repos/nf-core/` + +Use the `nf-core download --singularity-cache-only` command to start a downlonad. It will open an interactive menu. Choose `singularity` and `none` for the compression level. + +### Storage of Singularity images + +When downloading a new nf-core pipeline for the first time (or a specific version of a pipeline), you can choose to store the Singularity image for future use. A central location for these images is: `/apps/bio/dependencies/nf-core/singularities` + +Cached Singularity images can be accessed by running (or adding to your `.bashrc`) the following: + +``` +export NXF_SINGULARITY_CACHEDIR="/apps/bio/dependencies/nf-core/singularities" +``` + +This was also added to cronuser. + +## Run nf-core pipelines + +### Set-up + +Before running a pipeline you will need to load Nextflow and Singularity using the environment module system on medair. You can do this by issuing the commands below: ```bash ## Load Nextflow and Singularity environment modules @@ -18,18 +52,13 @@ module load singularity/v3.4.0 ``` >Should we link to Confluence page about new modules? See something about updating modules? etc. ->Is this a good location to mention the nf-core conda environment? module load miniconda; source activate nf-core -## Storage of Singularity images - -When downloading a new nf-core pipeline for the first time (or a specific version of a pipeline), you can choose to store the Singularity image for future use. A central location for these images is: `/apps/bio/dependencies/nf-core/singularities` - -Cached Singularity images can be accessed by running (or adding to your `.bashrc`) the following line: `export NXF_SINGULARITY_CACHEDIR="/apps/bio/dependencies/nf-core/singularities"` #is that correct?? - -## Different profiles depending on what you are running +### Choose a profile Depending on what you are running, you can choose between the `wgs` and `production` profiles. Jobs running with the `wgs` profile run on a queue with higher priority. Jobs running with the `production` profile can last longer (max time: 20 times, versus 2 days for the `wgs` profile). +>Usage: -profile medair,wgs ?? (Check) + ## iGenomes specific configuration >TODO modify this part. Do we want to have the iGenomes somewhere? From 84174043e4437338c1e11b218b71763de396bcd5 Mon Sep 17 00:00:00 2001 From: Gwenna Breton Date: Thu, 30 Jun 2022 16:41:50 +0200 Subject: [PATCH 25/91] Added details for nf-core pipelines download --- docs/medair.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/medair.md b/docs/medair.md index 9f2b51f..727becb 100644 --- a/docs/medair.md +++ b/docs/medair.md @@ -24,7 +24,7 @@ source activate nf-core We have started to download pipelines in the following location: `/apps/bio/repos/nf-core/` -Use the `nf-core download --singularity-cache-only` command to start a downlonad. It will open an interactive menu. Choose `singularity` and `none` for the compression level. +Use the `nf-core download --singularity-cache-only` command to start a download. It will open an interactive menu. Choose `singularity` for the software container image, and `none` for the compression type. ### Storage of Singularity images From abfa3f47f4a67884d4af82603b3ba886c23b054d Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Fri, 1 Jul 2022 15:55:44 +0200 Subject: [PATCH 26/91] lowered the job submit rate of vsc_ugent --- conf/vsc_ugent.config | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/conf/vsc_ugent.config b/conf/vsc_ugent.config index f2558fc..169faab 100644 --- a/conf/vsc_ugent.config +++ b/conf/vsc_ugent.config @@ -7,9 +7,9 @@ workDir = "$scratch_dir/work" // Perform work directory cleanup when the run has succesfully completed // cleanup = true -// Reduce the job submit rate to about 10 per second, this way the server won't be bombarded with jobs +// Reduce the job submit rate to about 5 per second, this way the server won't be bombarded with jobs executor { - submitRateLimit = '10 sec' + submitRateLimit = '5 sec' } // Specify that singularity should be used and where the cache dir will be for the images From fa949a069ef99aa002ce18d24b169f095e2377d7 Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Fri, 1 Jul 2022 16:01:19 +0200 Subject: [PATCH 27/91] added queuesize --- conf/vsc_ugent.config | 1 + 1 file changed, 1 insertion(+) diff --git a/conf/vsc_ugent.config b/conf/vsc_ugent.config index 169faab..9837a54 100644 --- a/conf/vsc_ugent.config +++ b/conf/vsc_ugent.config @@ -10,6 +10,7 @@ workDir = "$scratch_dir/work" // Reduce the job submit rate to about 5 per second, this way the server won't be bombarded with jobs executor { submitRateLimit = '5 sec' + queueSize = 50 } // Specify that singularity should be used and where the cache dir will be for the images From 1e3459528053d41f18b6df38152b806ae138916b Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Fri, 1 Jul 2022 16:16:18 +0200 Subject: [PATCH 28/91] removed queue size limit --- conf/vsc_ugent.config | 1 - 1 file changed, 1 deletion(-) diff --git a/conf/vsc_ugent.config b/conf/vsc_ugent.config index 9837a54..169faab 100644 --- a/conf/vsc_ugent.config +++ b/conf/vsc_ugent.config @@ -10,7 +10,6 @@ workDir = "$scratch_dir/work" // Reduce the job submit rate to about 5 per second, this way the server won't be bombarded with jobs executor { submitRateLimit = '5 sec' - queueSize = 50 } // Specify that singularity should be used and where the cache dir will be for the images From b202ad70ac95bfecd72256e809ac8e734df4debd Mon Sep 17 00:00:00 2001 From: Gwenna Breton Date: Wed, 6 Jul 2022 11:16:38 +0200 Subject: [PATCH 29/91] Removed mention of specific module versions --- docs/medair.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/medair.md b/docs/medair.md index 727becb..7be38e6 100644 --- a/docs/medair.md +++ b/docs/medair.md @@ -14,7 +14,7 @@ First you need to load relevant softwares: Nextflow and nf-core tools. You can d ```bash ## Load Nextflow -module load nextflow/21.10.5.5658 +module load nextflow ## Load nf-core tools module load miniconda source activate nf-core @@ -47,8 +47,8 @@ Before running a pipeline you will need to load Nextflow and Singularity using t ```bash ## Load Nextflow and Singularity environment modules module purge -module load nextflow/21.10.5.5658 -module load singularity/v3.4.0 +module load nextflow +module load singularity ``` >Should we link to Confluence page about new modules? See something about updating modules? etc. From 1d7a34b11f3a657e3db0bc1f2f95a330ee7165a3 Mon Sep 17 00:00:00 2001 From: Gwenna Breton Date: Wed, 6 Jul 2022 11:17:58 +0200 Subject: [PATCH 30/91] Removed comment about internal doc --- docs/medair.md | 2 -- 1 file changed, 2 deletions(-) diff --git a/docs/medair.md b/docs/medair.md index 7be38e6..dd2b9aa 100644 --- a/docs/medair.md +++ b/docs/medair.md @@ -51,8 +51,6 @@ module load nextflow module load singularity ``` ->Should we link to Confluence page about new modules? See something about updating modules? etc. - ### Choose a profile Depending on what you are running, you can choose between the `wgs` and `production` profiles. Jobs running with the `wgs` profile run on a queue with higher priority. Jobs running with the `production` profile can last longer (max time: 20 times, versus 2 days for the `wgs` profile). From 235097e58e545c182bd9d02b00a5e424f150b7bf Mon Sep 17 00:00:00 2001 From: Gwenna Breton Date: Wed, 6 Jul 2022 11:20:59 +0200 Subject: [PATCH 31/91] Addressed review comments - removed mention of iGenomes - clarified whom to contact for support --- docs/medair.md | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/docs/medair.md b/docs/medair.md index dd2b9aa..0c54c81 100644 --- a/docs/medair.md +++ b/docs/medair.md @@ -57,12 +57,5 @@ Depending on what you are running, you can choose between the `wgs` and `product >Usage: -profile medair,wgs ?? (Check) -## iGenomes specific configuration - ->TODO modify this part. Do we want to have the iGenomes somewhere? - -A local copy of the iGenomes resource has been made available on PROFILE CLUSTER so you should be able to run the pipeline against any reference available in the `igenomes.config` specific to the nf-core pipeline. -You can do this by simply using the `--genome ` parameter. - ->NB: You will need an account to use the HPC cluster on PROFILE CLUSTER in order to run the pipeline. If in doubt contact IT. ->NB: Nextflow will need to submit the jobs via the job scheduler to the HPC cluster and as such the commands above will have to be executed on one of the login nodes. If in doubt contact IT. +>NB: You will need an account to use the HPC cluster on PROFILE CLUSTER in order to run the pipeline. If in doubt contact cgg-it. +>NB: Nextflow will need to submit the jobs via the job scheduler to the HPC cluster and as such the commands above will have to be executed on one of the login nodes. If in doubt contact cgg-it. From 1d13dd9fbce6b3c8a7169e9d4f7be51ac717e49a Mon Sep 17 00:00:00 2001 From: Gwenna Breton Date: Wed, 6 Jul 2022 11:24:03 +0200 Subject: [PATCH 32/91] Changed where cgg-it is mentioned --- docs/medair.md | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/docs/medair.md b/docs/medair.md index 0c54c81..96c44e3 100644 --- a/docs/medair.md +++ b/docs/medair.md @@ -6,6 +6,8 @@ To use, run the pipeline with `-profile medair`. This will download and launch t It will enable Nextflow to manage the pipeline jobs via the `SGE` job scheduler. Using this profile, a docker image containing all of the required software will be downloaded, and converted to a Singularity image before execution of the pipeline. +You will need an account to use the Medair cluster in order to download or run pipelines. If in doubt, contact cgg-it. + ## Download nf-core pipelines ### Set-up @@ -40,6 +42,8 @@ This was also added to cronuser. ## Run nf-core pipelines +Nextflow will need to submit the jobs via the job scheduler to the HPC cluster and as such the commands below will have to be executed on one of the login nodes. If in doubt contact cgg-it. + ### Set-up Before running a pipeline you will need to load Nextflow and Singularity using the environment module system on medair. You can do this by issuing the commands below: @@ -56,6 +60,3 @@ module load singularity Depending on what you are running, you can choose between the `wgs` and `production` profiles. Jobs running with the `wgs` profile run on a queue with higher priority. Jobs running with the `production` profile can last longer (max time: 20 times, versus 2 days for the `wgs` profile). >Usage: -profile medair,wgs ?? (Check) - ->NB: You will need an account to use the HPC cluster on PROFILE CLUSTER in order to run the pipeline. If in doubt contact cgg-it. ->NB: Nextflow will need to submit the jobs via the job scheduler to the HPC cluster and as such the commands above will have to be executed on one of the login nodes. If in doubt contact cgg-it. From 96fd2bf217b71c73f07552857c70569dbbcb4c55 Mon Sep 17 00:00:00 2001 From: Gwennid Date: Wed, 6 Jul 2022 11:42:05 +0200 Subject: [PATCH 33/91] Fixed formatting --- docs/medair.md | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/docs/medair.md b/docs/medair.md index 96c44e3..6185f64 100644 --- a/docs/medair.md +++ b/docs/medair.md @@ -1,8 +1,8 @@ # nf-core/configs: Medair Configuration -All nf-core pipelines have been successfully configured for use on the medair cluster at Clinical Genomics Gothenburg. +All nf-core pipelines have been successfully configured for use on the Medair cluster at Clinical Genomics Gothenburg. -To use, run the pipeline with `-profile medair`. This will download and launch the [`medair.config`](../conf/medair.config) which has been pre-configured with a setup suitable for the medair cluster. +To use, run the pipeline with `-profile medair`. This will download and launch the [`medair.config`](../conf/medair.config) which has been pre-configured with a setup suitable for the Medair cluster. It will enable Nextflow to manage the pipeline jobs via the `SGE` job scheduler. Using this profile, a docker image containing all of the required software will be downloaded, and converted to a Singularity image before execution of the pipeline. @@ -10,7 +10,7 @@ You will need an account to use the Medair cluster in order to download or run p ## Download nf-core pipelines -### Set-up +### Set-up: load Nextflow and nf-core tools First you need to load relevant softwares: Nextflow and nf-core tools. You can do it as follow: @@ -32,9 +32,9 @@ Use the `nf-core download --singularity-cache-only` command to start a download. When downloading a new nf-core pipeline for the first time (or a specific version of a pipeline), you can choose to store the Singularity image for future use. A central location for these images is: `/apps/bio/dependencies/nf-core/singularities` -Cached Singularity images can be accessed by running (or adding to your `.bashrc`) the following: +Cached Singularity images can be accessed by running (or adding to your `.bashrc`) the following: -``` +```bash export NXF_SINGULARITY_CACHEDIR="/apps/bio/dependencies/nf-core/singularities" ``` @@ -44,9 +44,9 @@ This was also added to cronuser. Nextflow will need to submit the jobs via the job scheduler to the HPC cluster and as such the commands below will have to be executed on one of the login nodes. If in doubt contact cgg-it. -### Set-up +### Set-up: load Nextflow and Singularity -Before running a pipeline you will need to load Nextflow and Singularity using the environment module system on medair. You can do this by issuing the commands below: +Before running a pipeline you will need to load Nextflow and Singularity using the environment module system on Medair. You can do this by issuing the commands below: ```bash ## Load Nextflow and Singularity environment modules @@ -59,4 +59,4 @@ module load singularity Depending on what you are running, you can choose between the `wgs` and `production` profiles. Jobs running with the `wgs` profile run on a queue with higher priority. Jobs running with the `production` profile can last longer (max time: 20 times, versus 2 days for the `wgs` profile). ->Usage: -profile medair,wgs ?? (Check) +> Usage: -profile medair,wgs ?? (Check) From b6e2d03a02938b2eb6a1aff451140b43bf1835fa Mon Sep 17 00:00:00 2001 From: Gwennid Date: Wed, 6 Jul 2022 13:59:35 +0200 Subject: [PATCH 34/91] Added Singularity cache dir --- conf/medair.config | 1 + 1 file changed, 1 insertion(+) diff --git a/conf/medair.config b/conf/medair.config index 6cce6f9..183d4f6 100644 --- a/conf/medair.config +++ b/conf/medair.config @@ -8,6 +8,7 @@ params { //Nextflow parameters singularity { enabled = true + cacheDir = "/apps/bio/dependencies/nf-core/singularities" } profiles { From 746db14d4745ad3369d12d09b3b056db55c787f9 Mon Sep 17 00:00:00 2001 From: Gwenna Breton Date: Thu, 7 Jul 2022 11:42:46 +0200 Subject: [PATCH 35/91] Update medair.md - updated text about storing singularity images - updated text about using profiles --- docs/medair.md | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/docs/medair.md b/docs/medair.md index 6185f64..5eb1c46 100644 --- a/docs/medair.md +++ b/docs/medair.md @@ -22,23 +22,23 @@ module load miniconda source activate nf-core ``` -### Download a pipeline - -We have started to download pipelines in the following location: `/apps/bio/repos/nf-core/` - -Use the `nf-core download --singularity-cache-only` command to start a download. It will open an interactive menu. Choose `singularity` for the software container image, and `none` for the compression type. - ### Storage of Singularity images -When downloading a new nf-core pipeline for the first time (or a specific version of a pipeline), you can choose to store the Singularity image for future use. A central location for these images is: `/apps/bio/dependencies/nf-core/singularities` +When downloading a nf-core pipeline for the first time (or a specific version of a pipeline), you can choose to store the Singularity image for future use. A central location for these images is: `/apps/bio/dependencies/nf-core/singularities`. -Cached Singularity images can be accessed by running (or adding to your `.bashrc`) the following: +If you run or add the following to your `.bashrc`, Nexflow will know where to store the images: ```bash export NXF_SINGULARITY_CACHEDIR="/apps/bio/dependencies/nf-core/singularities" ``` -This was also added to cronuser. +> Comment: This was also added to cronuser. + +### Download a pipeline + +We have started to download pipelines in the following location: `/apps/bio/repos/nf-core/` + +Use the `nf-core download --singularity-cache-only` command to start a download. It will open an interactive menu. Choose `singularity` for the software container image, and `none` for the compression type. ## Run nf-core pipelines @@ -59,4 +59,8 @@ module load singularity Depending on what you are running, you can choose between the `wgs` and `production` profiles. Jobs running with the `wgs` profile run on a queue with higher priority. Jobs running with the `production` profile can last longer (max time: 20 times, versus 2 days for the `wgs` profile). -> Usage: -profile medair,wgs ?? (Check) +For example, the following job would run with the `wgs` profile: + +```bash +run nextflow nf-core/raredisease -profile medair,wgs +``` From 4538e8aa36d71b06cbf46a68f82596f513b1c506 Mon Sep 17 00:00:00 2001 From: Gwenna Breton Date: Thu, 7 Jul 2022 13:41:42 +0200 Subject: [PATCH 36/91] Update medair.config - added path to singularity image for Sentieon - harmonized alignment --- conf/medair.config | 36 +++++++++++++++++++++--------------- 1 file changed, 21 insertions(+), 15 deletions(-) diff --git a/conf/medair.config b/conf/medair.config index 183d4f6..eeda705 100644 --- a/conf/medair.config +++ b/conf/medair.config @@ -1,13 +1,13 @@ //Profile config names for nf-core/configs params { config_profile_description = 'Cluster profile for medair (local cluster of Clinical Genomics Gothenburg)' - config_profile_contact = 'Clinical Genomics, Gothenburg' - config_profile_url = 'https://www.scilifelab.se/units/clinical-genomics-goteborg/' + config_profile_contact = 'Clinical Genomics, Gothenburg' + config_profile_url = 'https://www.scilifelab.se/units/clinical-genomics-goteborg/' } //Nextflow parameters singularity { - enabled = true + enabled = true cacheDir = "/apps/bio/dependencies/nf-core/singularities" } @@ -15,26 +15,32 @@ profiles { wgs { process { - queue = 'wgs.q' - executor = 'sge' - penv = 'mpi' + queue = 'wgs.q' + executor = 'sge' + penv = 'mpi' process.clusterOptions = '-l excl=1' - params.max_cpus = 40 - params.max_time = 48.h - params.max_memory = 128.GB + params.max_cpus = 40 + params.max_time = 48.h + params.max_memory = 128.GB } } production { process { - queue = 'production.q' - executor = 'sge' - penv = 'mpi' + queue = 'production.q' + executor = 'sge' + penv = 'mpi' process.clusterOptions = '-l excl=1' - params.max_cpus = 40 - params.max_time = 480.h - params.max_memory = 128.GB + params.max_cpus = 40 + params.max_time = 480.h + params.max_memory = 128.GB } } } +//Specific parameter for pipelines that can use Sentieon (e.g. nf-core/sarek, nf-core/raredisease) +process { + withLabel:'sentieon' { + container = "/apps/bio/singularities/sentieon-211204-peta.simg" + } +} From 6b9cdc8ef3c959cf7a4c2137a1b6bf80ee970a07 Mon Sep 17 00:00:00 2001 From: Gwenna Breton Date: Thu, 7 Jul 2022 14:24:15 +0200 Subject: [PATCH 37/91] Added info about Sentieon singularities --- docs/medair.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/docs/medair.md b/docs/medair.md index 5eb1c46..7b0c7d7 100644 --- a/docs/medair.md +++ b/docs/medair.md @@ -64,3 +64,7 @@ For example, the following job would run with the `wgs` profile: ```bash run nextflow nf-core/raredisease -profile medair,wgs ``` + +### Sentieon + +In some pipelines (sarek, raredisease) it is possible to use Sentieon for alignment and variant calling. If ones uses the label `sentieon` for running a process, the config file contains the path to the Sentieon singularity image on Medair. From 2ab126fa4d2f235ad9dc949d2dfae6d1146cd468 Mon Sep 17 00:00:00 2001 From: Harshil Patel Date: Mon, 11 Jul 2022 18:28:41 +0100 Subject: [PATCH 38/91] Update default Nextclade dataset to 2022-06-14T12:00:00Z --- conf/pipeline/viralrecon/genomes.config | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/conf/pipeline/viralrecon/genomes.config b/conf/pipeline/viralrecon/genomes.config index baad834..0d5e754 100644 --- a/conf/pipeline/viralrecon/genomes.config +++ b/conf/pipeline/viralrecon/genomes.config @@ -13,18 +13,18 @@ params { // Please use 'MN908947.3' if possible because all primer sets are available / have been pre-prepared relative to that assembly fasta = 'https://github.com/nf-core/test-datasets/raw/viralrecon/genome/NC_045512.2/GCF_009858895.2_ASM985889v3_genomic.200409.fna.gz' gff = 'https://github.com/nf-core/test-datasets/raw/viralrecon/genome/NC_045512.2/GCF_009858895.2_ASM985889v3_genomic.200409.gff.gz' - nextclade_dataset = 'https://github.com/nf-core/test-datasets/raw/viralrecon/genome/MN908947.3/nextclade_sars-cov-2_MN908947_2022-01-18T12_00_00Z.tar.gz' + nextclade_dataset = 'https://github.com/nf-core/test-datasets/raw/viralrecon/genome/MN908947.3/nextclade_sars-cov-2_MN908947_2022-06-14T12_00_00Z.tar.gz' nextclade_dataset_name = 'sars-cov-2' nextclade_dataset_reference = 'MN908947' - nextclade_dataset_tag = '2022-01-18T12:00:00Z' + nextclade_dataset_tag = '2022-06-14T12:00:00Z' } 'MN908947.3' { fasta = 'https://github.com/nf-core/test-datasets/raw/viralrecon/genome/MN908947.3/GCA_009858895.3_ASM985889v3_genomic.200409.fna.gz' gff = 'https://github.com/nf-core/test-datasets/raw/viralrecon/genome/MN908947.3/GCA_009858895.3_ASM985889v3_genomic.200409.gff.gz' - nextclade_dataset = 'https://github.com/nf-core/test-datasets/raw/viralrecon/genome/MN908947.3/nextclade_sars-cov-2_MN908947_2022-01-18T12_00_00Z.tar.gz' + nextclade_dataset = 'https://github.com/nf-core/test-datasets/raw/viralrecon/genome/MN908947.3/nextclade_sars-cov-2_MN908947_2022-06-14T12_00_00Z.tar.gz' nextclade_dataset_name = 'sars-cov-2' nextclade_dataset_reference = 'MN908947' - nextclade_dataset_tag = '2022-01-18T12:00:00Z' + nextclade_dataset_tag = '2022-06-14T12:00:00Z' primer_sets { artic { '1' { From 5d7789792fee4ea42b04e71c7961ee3cd3343c6e Mon Sep 17 00:00:00 2001 From: Erik Danielsson Date: Tue, 12 Jul 2022 09:57:20 +0200 Subject: [PATCH 39/91] Add CITATION.cff and update .editorconfig --- .editorconfig | 2 +- CITATION.cff | 56 +++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 57 insertions(+), 1 deletion(-) create mode 100644 CITATION.cff diff --git a/.editorconfig b/.editorconfig index 835c83d..70c7a9a 100644 --- a/.editorconfig +++ b/.editorconfig @@ -8,5 +8,5 @@ trim_trailing_whitespace = true indent_size = 4 indent_style = space -[*.{md,yml,yaml}] +[*.{md,yml,yaml,cff}] indent_size = 2 diff --git a/CITATION.cff b/CITATION.cff new file mode 100644 index 0000000..4533e2f --- /dev/null +++ b/CITATION.cff @@ -0,0 +1,56 @@ +cff-version: 1.2.0 +message: "If you use `nf-core tools` in your work, please cite the `nf-core` publication" +authors: + - family-names: Ewels + given-names: Philip + - family-names: Peltzer + given-names: Alexander + - family-names: Fillinger + given-names: Sven + - family-names: Patel + given-names: Harshil + - family-names: Alneberg + given-names: Johannes + - family-names: Wilm + given-names: Andreas + - family-names: Ulysse Garcia + given-names: Maxime + - family-names: Di Tommaso + given-names: Paolo + - family-names: Nahnsen + given-names: Sven +title: "The nf-core framework for community-curated bioinformatics pipelines." +version: 2.4.1 +doi: 10.1038/s41587-020-0439-x +date-released: 2022-05-16 +url: https://github.com/nf-core/tools +prefered-citation: + type: article + authors: + - family-names: Ewels + given-names: Philip + - family-names: Peltzer + given-names: Alexander + - family-names: Fillinger + given-names: Sven + - family-names: Patel + given-names: Harshil + - family-names: Alneberg + given-names: Johannes + - family-names: Wilm + given-names: Andreas + - family-names: Ulysse Garcia + given-names: Maxime + - family-names: Di Tommaso + given-names: Paolo + - family-names: Nahnsen + given-names: Sven + doi: 10.1038/s41587-020-0439-x + journal: nature biotechnology + start: 276 + end: 278 + title: "The nf-core framework for community-curated bioinformatics pipelines." + issue: 3 + volume: 38 + year: 2020 + url: https://dx.doi.org/10.1038/s41587-020-0439-x From f40825ac142f3c13a8b1b01605193100f752a49d Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Thu, 14 Jul 2022 15:50:50 +0200 Subject: [PATCH 40/91] lower submission rate for vsc_ugent --- conf/vsc_ugent.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/vsc_ugent.config b/conf/vsc_ugent.config index e1e9418..0bc6ffd 100644 --- a/conf/vsc_ugent.config +++ b/conf/vsc_ugent.config @@ -9,7 +9,7 @@ workDir = "$scratch_dir/work" // Reduce the job submit rate to about 5 per second, this way the server won't be bombarded with jobs executor { - submitRateLimit = '5 sec' + submitRateLimit = '3 sec' } // Specify that singularity should be used and where the cache dir will be for the images From 58508c4fbc27bd71c82a16f6ec2bee277bc53aea Mon Sep 17 00:00:00 2001 From: Priyanka Surana Date: Wed, 20 Jul 2022 13:14:42 +0100 Subject: [PATCH 41/91] Sanger profile update --- conf/sanger.config | 46 ++++++++++++++++++++++------------------------ 1 file changed, 22 insertions(+), 24 deletions(-) diff --git a/conf/sanger.config b/conf/sanger.config index 1955ca9..df2dee7 100644 --- a/conf/sanger.config +++ b/conf/sanger.config @@ -1,35 +1,33 @@ +// Profile details params { - config_profile_description = 'The Wellcome Sanger Institute HPC cluster profile' - config_profile_contact = 'Anthony Underwood (@aunderwo)' - config_profile_url = 'https://www.sanger.ac.uk/group/informatics-support-group/' -} - -singularity { - enabled = true - cacheDir = "${baseDir}/singularity" - runOptions = '--bind /lustre --bind /nfs/pathnfs01 --bind /nfs/pathnfs02 --bind /nfs/pathnfs03 --bind /nfs/pathnfs04 --bind /nfs/pathnfs05 --bind /nfs/pathnfs06 --no-home' + config_profile_description = 'The Wellcome Sanger Institute HPC cluster (farm5) profile' + config_profile_contact = 'Priyanka Surana (@priyanka-surana)' + config_profile_url = 'https://www.sanger.ac.uk' } +// Queue and retry strategy process{ - executor = 'lsf' - queue = 'normal' - errorStrategy = { task.attempt <= 5 ? "retry" : "finish" } - process.maxRetries = 5 - withLabel:process_long { - queue = 'long' - } + executor = 'lsf' + queue = { task.time < 12.h ? 'normal' : task.time < 48.h ? 'long' : 'basement' } + errorStrategy = 'retry' + maxRetries = 5 } +// Executor details executor{ - name = 'lsf' - perJobMemLimit = true - poolSize = 4 - submitRateLimit = '5 sec' - killBatchSize = 50 + name = 'lsf' + perJobMemLimit = true + poolSize = 4 + submitRateLimit = '5 sec' + killBatchSize = 50 } +// Max resources params { - max_memory = 128.GB - max_cpus = 64 - max_time = 48.h + max_memory = 683.GB + max_cpus = 256 + max_time = 720.h } + +// For singularity +singularity.runOptions = '--bind /lustre --bind /nfs' From fd87fbcb9dadd51ed8dff1804f465132f1cc72e3 Mon Sep 17 00:00:00 2001 From: Priyanka Surana Date: Wed, 20 Jul 2022 13:22:51 +0100 Subject: [PATCH 42/91] Sanger profile update --- docs/sanger.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/sanger.md b/docs/sanger.md index ee75755..3fa7ae3 100644 --- a/docs/sanger.md +++ b/docs/sanger.md @@ -14,7 +14,7 @@ The latest version of Nextflow is not installed by default on the cluster. You w A recommended place to move the `nextflow` executable to is `~/bin` so that it's in the `PATH`. Nextflow manages each process as a separate job that is submitted to the cluster by using the `bsub` command. -Since the Nextflow pipeline will submit individual jobs for each process to the cluster and dependencies will be provided bu Singularity images you shoudl make sure that your account has access to the Singularity binary by adding these lines to your `.bashrc` file +Since the Nextflow pipeline will submit individual jobs for each process to the cluster and dependencies will be provided by Singularity images you should make sure that your account has access to the Singularity binary by adding these lines to your `.bashrc` file ```bash [[ -f /software/pathogen/farm5 ]] && module load ISG/singularity @@ -26,7 +26,7 @@ To do so make a shell script with a similar structure to the following code and ```bash #!/bin/bash #BSUB -o /path/to/a/log/dir/%J.o -#BSUB -e /path/to/a/log/dir//%J.e +#BSUB -e /path/to/a/log/dir/%J.e #BSUB -M 8000 #BSUB -q long #BSUB -n 4 From 2aad355fbfbb694a88727eaf0884d76ee40ffce2 Mon Sep 17 00:00:00 2001 From: Matthieu Muffato Date: Wed, 20 Jul 2022 14:32:43 +0100 Subject: [PATCH 43/91] The config doesn't mandate Singularity any more (Conda is fine too) --- docs/sanger.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/sanger.md b/docs/sanger.md index 3fa7ae3..f4c490c 100644 --- a/docs/sanger.md +++ b/docs/sanger.md @@ -2,8 +2,6 @@ To use, run the pipeline with `-profile sanger`. This will download and launch the [`sanger.config`](../conf/sanger.config) which has been pre-configured with a setup suitable for the Wellcome Sanger Institute LSF cluster. -Using this profile, either a docker image containing all of the required software will be downloaded, and converted to a Singularity image or -a Singularity image downloaded directly before execution of the pipeline. ## Running the workflow on the Wellcome Sanger Institute cluster @@ -14,7 +12,9 @@ The latest version of Nextflow is not installed by default on the cluster. You w A recommended place to move the `nextflow` executable to is `~/bin` so that it's in the `PATH`. Nextflow manages each process as a separate job that is submitted to the cluster by using the `bsub` command. -Since the Nextflow pipeline will submit individual jobs for each process to the cluster and dependencies will be provided by Singularity images you should make sure that your account has access to the Singularity binary by adding these lines to your `.bashrc` file + +If asking Nextflow to use Singularity to run the individual jobs, +you should make sure that your account has access to the Singularity binary by adding these lines to your `.bashrc` file ```bash [[ -f /software/pathogen/farm5 ]] && module load ISG/singularity From 1f1a9ac5f0fe13401d857adc5472bb7d8da21975 Mon Sep 17 00:00:00 2001 From: Matthieu Muffato Date: Wed, 20 Jul 2022 14:34:25 +0100 Subject: [PATCH 44/91] ISG provide the module, not the pathogen informatics team --- docs/sanger.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/sanger.md b/docs/sanger.md index f4c490c..a44acf3 100644 --- a/docs/sanger.md +++ b/docs/sanger.md @@ -17,7 +17,7 @@ If asking Nextflow to use Singularity to run the individual jobs, you should make sure that your account has access to the Singularity binary by adding these lines to your `.bashrc` file ```bash -[[ -f /software/pathogen/farm5 ]] && module load ISG/singularity +[[ -f /software/modules/ISG/singularity ]] && module load ISG/singularity ``` Nextflow shouldn't run directly on the submission node but on a compute node. From 60dc451569971795c7f20b4c8ad7bc6517b53b7b Mon Sep 17 00:00:00 2001 From: Matthieu Muffato Date: Wed, 20 Jul 2022 14:34:59 +0100 Subject: [PATCH 45/91] The oversubscribed queue is recommended for workflow managers --- docs/sanger.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/sanger.md b/docs/sanger.md index a44acf3..caf74bb 100644 --- a/docs/sanger.md +++ b/docs/sanger.md @@ -28,7 +28,7 @@ To do so make a shell script with a similar structure to the following code and #BSUB -o /path/to/a/log/dir/%J.o #BSUB -e /path/to/a/log/dir/%J.e #BSUB -M 8000 -#BSUB -q long +#BSUB -q oversubscribed #BSUB -n 4 export HTTP_PROXY='http://wwwcache.sanger.ac.uk:3128' From 1ee20f0bb0d018f341067a27c9e05b66ae1b6ce4 Mon Sep 17 00:00:00 2001 From: Matthieu Muffato Date: Wed, 20 Jul 2022 14:35:22 +0100 Subject: [PATCH 46/91] 2 cores should be enough for the nextflow manager itself --- docs/sanger.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/sanger.md b/docs/sanger.md index caf74bb..a3b590f 100644 --- a/docs/sanger.md +++ b/docs/sanger.md @@ -29,7 +29,7 @@ To do so make a shell script with a similar structure to the following code and #BSUB -e /path/to/a/log/dir/%J.e #BSUB -M 8000 #BSUB -q oversubscribed -#BSUB -n 4 +#BSUB -n 2 export HTTP_PROXY='http://wwwcache.sanger.ac.uk:3128' export HTTPS_PROXY='http://wwwcache.sanger.ac.uk:3128' From dda1d810b1a772ea3947561a69de59299a677728 Mon Sep 17 00:00:00 2001 From: Matthieu Muffato Date: Wed, 20 Jul 2022 14:35:39 +0100 Subject: [PATCH 47/91] Latest stable version (rather than an edge one) --- docs/sanger.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/sanger.md b/docs/sanger.md index a3b590f..ac6df4d 100644 --- a/docs/sanger.md +++ b/docs/sanger.md @@ -35,7 +35,7 @@ export HTTP_PROXY='http://wwwcache.sanger.ac.uk:3128' export HTTPS_PROXY='http://wwwcache.sanger.ac.uk:3128' export NXF_ANSI_LOG=false export NXF_OPTS="-Xms8G -Xmx8G -Dnxf.pool.maxThreads=2000" -export NXF_VER=21.04.0-edge +export NXF_VER=22.04.0-5697 nextflow run \ From 0b47849532ec03528fef5d551804a5fb330d4e8f Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Wed, 27 Jul 2022 14:23:29 +0200 Subject: [PATCH 48/91] Update mpcdf.config --- conf/mpcdf.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/mpcdf.config b/conf/mpcdf.config index ee33913..93e2924 100644 --- a/conf/mpcdf.config +++ b/conf/mpcdf.config @@ -61,7 +61,7 @@ profiles { params { config_profile_description = 'MPCDF raven profile (unofficially) provided by nf-core/configs.' - memory = 2000000.MB + max_memory = 2000000.MB max_cpus = 72 max_time = 24.h } From 648025c1ceea79460eb1f064d72d49d25f3ee942 Mon Sep 17 00:00:00 2001 From: Gwenna Breton Date: Mon, 8 Aug 2022 16:04:21 +0200 Subject: [PATCH 49/91] Clarified where to store singularity images --- docs/medair.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/medair.md b/docs/medair.md index 7b0c7d7..48ad557 100644 --- a/docs/medair.md +++ b/docs/medair.md @@ -24,9 +24,9 @@ source activate nf-core ### Storage of Singularity images -When downloading a nf-core pipeline for the first time (or a specific version of a pipeline), you can choose to store the Singularity image for future use. A central location for these images is: `/apps/bio/dependencies/nf-core/singularities`. +When downloading a nf-core pipeline for the first time (or a specific version of a pipeline), you can choose to store the Singularity image for future use. We chose to have a central location for these images on medair: `/apps/bio/dependencies/nf-core/singularities`. -If you run or add the following to your `.bashrc`, Nexflow will know where to store the images: +For Nexflow to know where to store new images, run or add the following to your `.bashrc`: ```bash export NXF_SINGULARITY_CACHEDIR="/apps/bio/dependencies/nf-core/singularities" From 2740b5efbfc62aed864c027396fd33cbc075d07a Mon Sep 17 00:00:00 2001 From: Gwenna Breton Date: Mon, 8 Aug 2022 16:08:05 +0200 Subject: [PATCH 50/91] Update docs/medair.md MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixed typo Co-authored-by: Matthias Hörtenhuber --- docs/medair.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/medair.md b/docs/medair.md index 48ad557..bd13e84 100644 --- a/docs/medair.md +++ b/docs/medair.md @@ -57,7 +57,7 @@ module load singularity ### Choose a profile -Depending on what you are running, you can choose between the `wgs` and `production` profiles. Jobs running with the `wgs` profile run on a queue with higher priority. Jobs running with the `production` profile can last longer (max time: 20 times, versus 2 days for the `wgs` profile). +Depending on what you are running, you can choose between the `wgs` and `production` profiles. Jobs running with the `wgs` profile run on a queue with higher priority. Jobs running with the `production` profile can last longer (max time: 20 days, versus 2 days for the `wgs` profile). For example, the following job would run with the `wgs` profile: From ffefefe0ab95dec7e7ba45fbe7fb777972b276f6 Mon Sep 17 00:00:00 2001 From: Gwenna Breton Date: Mon, 8 Aug 2022 16:11:55 +0200 Subject: [PATCH 51/91] Added contact emails --- conf/medair.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/medair.config b/conf/medair.config index eeda705..d147643 100644 --- a/conf/medair.config +++ b/conf/medair.config @@ -1,7 +1,7 @@ //Profile config names for nf-core/configs params { config_profile_description = 'Cluster profile for medair (local cluster of Clinical Genomics Gothenburg)' - config_profile_contact = 'Clinical Genomics, Gothenburg' + config_profile_contact = 'Clinical Genomics, Gothenburg (cgg-rd@gu.se, cgg-it@gu.se)' config_profile_url = 'https://www.scilifelab.se/units/clinical-genomics-goteborg/' } From 0e4f2b40d3f0e2873a34569dc440858f871241e1 Mon Sep 17 00:00:00 2001 From: Gwenna Breton Date: Mon, 8 Aug 2022 16:14:17 +0200 Subject: [PATCH 52/91] Added email of IT group --- docs/medair.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/medair.md b/docs/medair.md index bd13e84..706332f 100644 --- a/docs/medair.md +++ b/docs/medair.md @@ -42,7 +42,7 @@ Use the `nf-core download --singularity-cache-only` command to start a download. ## Run nf-core pipelines -Nextflow will need to submit the jobs via the job scheduler to the HPC cluster and as such the commands below will have to be executed on one of the login nodes. If in doubt contact cgg-it. +Nextflow will need to submit the jobs via the job scheduler to the HPC cluster and as such the commands below will have to be executed on one of the login nodes. If in doubt contact cgg-it (cgg-it[at]gu.se). ### Set-up: load Nextflow and Singularity From 0bfedde296033992860b9b4c020d490e738434e6 Mon Sep 17 00:00:00 2001 From: SPearce Date: Fri, 12 Aug 2022 15:43:22 +0100 Subject: [PATCH 53/91] Initial attempt at a CRUKMI config file --- .github/workflows/main.yml | 1 + README.md | 1 + conf/crukmi.config | 48 ++++++++++++++++++++++++++++++++++++++ docs/crukmi.md | 9 +++++++ nfcore_custom.config | 1 + 5 files changed, 60 insertions(+) create mode 100644 conf/crukmi.config create mode 100644 docs/crukmi.md diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index f5ba0d1..59d038b 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -49,6 +49,7 @@ jobs: - "cheaha" - "computerome" - "crick" + - "crukmi" - "denbi_qbic" - "ebc" - "eddie" diff --git a/README.md b/README.md index 94ce55c..918ab0c 100644 --- a/README.md +++ b/README.md @@ -103,6 +103,7 @@ Currently documentation is available for the following systems: - [CHEAHA](docs/cheaha.md) - [Computerome](docs/computerome.md) - [CRICK](docs/crick.md) +- [Cancer Research UK Manchester Institute](docs/crukmi.md) - [CZBIOHUB_AWS](docs/czbiohub.md) - [DENBI_QBIC](docs/denbi_qbic.md) - [EBC](docs/ebc.md) diff --git a/conf/crukmi.config b/conf/crukmi.config new file mode 100644 index 0000000..3e7383b --- /dev/null +++ b/conf/crukmi.config @@ -0,0 +1,48 @@ +//Profile config names for nf-core/configs +params { + config_profile_description = 'Cancer Research UK Manchester Institute HPC cluster profile provided by nf-core/configs' + config_profile_contact = 'Stephen Kitcatt, Simon Pearce (@skitcattCRUKMI, @sppearce)' + config_profile_url = 'http://scicom.picr.man.ac.uk/projects/user-support/wiki' +} + +env { + SINGULARITY_CACHEDIR = '/lmod/nextflow_software' +} + +singularity { + enabled = true + autoMounts = true +} + +process { + beforeScript = 'module load apps/singularity/3.8.0' + executor = 'pbs' + + withLabel:process_low { + cpus = { check_max( 1 * task.attempt, 'cpus' ) } + memory = { check_max( 5.GB * task.attempt, 'memory' ) } + } + + withLabel:process_medium { + cpus = { check_max( 4 * task.attempt, 'cpus' ) } + memory = { check_max( 20.GB * task.attempt, 'memory' ) } + } + + withLabel:process_high { + cpus = { check_max( 16 * task.attempt, 'cpus' ) } + memory = { check_max( 80.GB * task.attempt, 'memory' ) } + } + + withName: 'SAMTOOLS_MPILEUP' { + cpus = 1 + memory = { 8.GB * task.attempt } + } + + +} + +params { + max_memory = 2000.GB + max_cpus = 32 + max_time = 72.h +} diff --git a/docs/crukmi.md b/docs/crukmi.md new file mode 100644 index 0000000..a55a9fa --- /dev/null +++ b/docs/crukmi.md @@ -0,0 +1,9 @@ +# nf-core/configs: BI Configuration + +All nf-core pipelines have been successfully configured for use at Boehringer Ingelheim. + +To use, run the pipeline with `-profile bi`. This will download and launch the [`bi.config`](../conf/bi.config) which has been pre-configured with a setup suitable for the BI systems. Using this profile, a docker image containing all of the required software will be downloaded, and converted to a Singularity image before execution of the pipeline. + +Before running the pipeline you will need to follow the internal documentation to run Nextflow on our systems. Similar to that, you need to set an environment variable `NXF_GLOBAL_CONFIG` to the path of the internal global config which is not publicly available here. + +> NB: Nextflow will need to submit the jobs via the job scheduler to the HPC cluster and as such the commands above will have to be executed on one of the login nodes. If in doubt contact IT. diff --git a/nfcore_custom.config b/nfcore_custom.config index 6f0ac6c..42b7f66 100644 --- a/nfcore_custom.config +++ b/nfcore_custom.config @@ -30,6 +30,7 @@ profiles { cheaha { includeConfig "${params.custom_config_base}/conf/cheaha.config" } computerome { includeConfig "${params.custom_config_base}/conf/computerome.config" } crick { includeConfig "${params.custom_config_base}/conf/crick.config" } + crukmi { includeConfig "${params.custom_config_base}/conf/crukmi.config" } czbiohub_aws { includeConfig "${params.custom_config_base}/conf/czbiohub_aws.config" } denbi_qbic { includeConfig "${params.custom_config_base}/conf/denbi_qbic.config" } ebc { includeConfig "${params.custom_config_base}/conf/ebc.config" } From a97b887d9362f6b63634ea8f3c3e4b5fb9c38eaf Mon Sep 17 00:00:00 2001 From: SPearce Date: Fri, 12 Aug 2022 16:57:04 +0100 Subject: [PATCH 54/91] Written a help file --- docs/crukmi.md | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/docs/crukmi.md b/docs/crukmi.md index a55a9fa..609a840 100644 --- a/docs/crukmi.md +++ b/docs/crukmi.md @@ -1,9 +1,15 @@ -# nf-core/configs: BI Configuration +# nf-core/configs: Cancer Research UK Manchester Institute Configuration -All nf-core pipelines have been successfully configured for use at Boehringer Ingelheim. +All nf-core pipelines have been successfully configured for the use on the HPC (phoenix) at Cancer Research UK Manchester Institute. -To use, run the pipeline with `-profile bi`. This will download and launch the [`bi.config`](../conf/bi.config) which has been pre-configured with a setup suitable for the BI systems. Using this profile, a docker image containing all of the required software will be downloaded, and converted to a Singularity image before execution of the pipeline. +To use, run the pipeline with `-profile crukmi`. This will download and launch the [`crukmi.config`](../conf/crukmi.config) which has been pre-configured with a setup suitable for the phoenix HPC. Using this profile, singularity images will be downloaded to run on the cluster. -Before running the pipeline you will need to follow the internal documentation to run Nextflow on our systems. Similar to that, you need to set an environment variable `NXF_GLOBAL_CONFIG` to the path of the internal global config which is not publicly available here. +Before running the pipeline you will need to load Nextflow using the environment module system, for example via: -> NB: Nextflow will need to submit the jobs via the job scheduler to the HPC cluster and as such the commands above will have to be executed on one of the login nodes. If in doubt contact IT. +```bash +## Load Nextflow and Singularity environment modules +module purge +module load apps/nextflow/22.04.5 +``` + +The pipeline should always be executed inside a workspace on the `/scratch/` system. All of the intermediate files required to run the pipeline will be stored in the `work/` directory. It is recommended to delete this directory after the pipeline has finished successfully because it can get quite large, and all of the main output files will be saved in the `results/` directory. \ No newline at end of file From d0662a3acbd104f98e216ea9eeba438e1122a75d Mon Sep 17 00:00:00 2001 From: SPearce Date: Mon, 15 Aug 2022 09:33:30 +0100 Subject: [PATCH 55/91] Ran prettier --- docs/crukmi.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/crukmi.md b/docs/crukmi.md index 609a840..91dff58 100644 --- a/docs/crukmi.md +++ b/docs/crukmi.md @@ -12,4 +12,4 @@ module purge module load apps/nextflow/22.04.5 ``` -The pipeline should always be executed inside a workspace on the `/scratch/` system. All of the intermediate files required to run the pipeline will be stored in the `work/` directory. It is recommended to delete this directory after the pipeline has finished successfully because it can get quite large, and all of the main output files will be saved in the `results/` directory. \ No newline at end of file +The pipeline should always be executed inside a workspace on the `/scratch/` system. All of the intermediate files required to run the pipeline will be stored in the `work/` directory. It is recommended to delete this directory after the pipeline has finished successfully because it can get quite large, and all of the main output files will be saved in the `results/` directory. From 8ae3cba5e3e5466f6d9f42eadaf62b0911c821e9 Mon Sep 17 00:00:00 2001 From: SPearce Date: Mon, 15 Aug 2022 11:35:59 +0100 Subject: [PATCH 56/91] Reduce MPILEUP to 5GB --- conf/crukmi.config | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/conf/crukmi.config b/conf/crukmi.config index 3e7383b..f73c252 100644 --- a/conf/crukmi.config +++ b/conf/crukmi.config @@ -18,6 +18,10 @@ process { beforeScript = 'module load apps/singularity/3.8.0' executor = 'pbs' + errorStrategy = {task.exitStatus in [143,137,104,134,139,140] ? 'retry' : 'finish'} + maxErrors = '-1' + maxRetries = 3 + withLabel:process_low { cpus = { check_max( 1 * task.attempt, 'cpus' ) } memory = { check_max( 5.GB * task.attempt, 'memory' ) } @@ -35,7 +39,7 @@ process { withName: 'SAMTOOLS_MPILEUP' { cpus = 1 - memory = { 8.GB * task.attempt } + memory = { 5.GB * task.attempt } } From 1c942759ea9e30822181fb9d854744f102c62e18 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Thu, 18 Aug 2022 15:52:31 +0200 Subject: [PATCH 57/91] update sentieon --- conf/pipeline/raredisease/hasta.config | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/conf/pipeline/raredisease/hasta.config b/conf/pipeline/raredisease/hasta.config index fb4d94a..1bec159 100644 --- a/conf/pipeline/raredisease/hasta.config +++ b/conf/pipeline/raredisease/hasta.config @@ -8,7 +8,14 @@ process { memory = { check_max( 80.GB * task.attempt, 'memory' ) } } withLabel:'sentieon' { - beforeScript = { "export PATH=\$PATH:\$SENTIEON_INSTALL_DIR/sentieon-genomics-202010.02/bin" } + beforeScript = { "export PATH=\$PATH:\$SENTIEON_INSTALL_DIR/sentieon-genomics-202112.02/bin" } + } + // Java memory fixes + withName:'QUALIMAP_BAMQC' { + clusterOptions = { "-A $params.priority ${params.clusterOptions ?: ''} ${task.memory ? "--mem ${task.memory.mega * 1.15 as long}M" : ''}" } + } + withName:'PICARD_MARKDUPLICATES' { + clusterOptions = { "-A $params.priority ${params.clusterOptions ?: ''} ${task.memory ? "--mem ${task.memory.mega * 1.15 as long}M" : ''}" } } withName: BCFTOOLS_VIEW { if (params.genome == 'GRCh37') { From dd63dee7aa6422354311dcf4a55945ad18d01f1d Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Thu, 18 Aug 2022 15:56:41 +0200 Subject: [PATCH 58/91] update conf --- conf/pipeline/raredisease/hasta.config | 7 ------- 1 file changed, 7 deletions(-) diff --git a/conf/pipeline/raredisease/hasta.config b/conf/pipeline/raredisease/hasta.config index 1bec159..d655bf7 100644 --- a/conf/pipeline/raredisease/hasta.config +++ b/conf/pipeline/raredisease/hasta.config @@ -10,13 +10,6 @@ process { withLabel:'sentieon' { beforeScript = { "export PATH=\$PATH:\$SENTIEON_INSTALL_DIR/sentieon-genomics-202112.02/bin" } } - // Java memory fixes - withName:'QUALIMAP_BAMQC' { - clusterOptions = { "-A $params.priority ${params.clusterOptions ?: ''} ${task.memory ? "--mem ${task.memory.mega * 1.15 as long}M" : ''}" } - } - withName:'PICARD_MARKDUPLICATES' { - clusterOptions = { "-A $params.priority ${params.clusterOptions ?: ''} ${task.memory ? "--mem ${task.memory.mega * 1.15 as long}M" : ''}" } - } withName: BCFTOOLS_VIEW { if (params.genome == 'GRCh37') { ext.args = '--output-type z --apply-filters PASS --exclude "INFO/clinical_genomics_mipAF > 0.40 | INFO/swegenAF > 0.40 | INFO/clingen_ngiAF > 0.40 | INFO/gnomad_svAF > 0.40 "' From 8d5ae4c3ab53c8892bce378c58fb0e881c2e6cd0 Mon Sep 17 00:00:00 2001 From: Austyn Trull Date: Tue, 23 Aug 2022 15:30:13 -0500 Subject: [PATCH 59/91] Committing changes made by Brandon Wilk so that singularity containers will use scratch space --- conf/cheaha.config | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/conf/cheaha.config b/conf/cheaha.config index 323a625..578259c 100644 --- a/conf/cheaha.config +++ b/conf/cheaha.config @@ -5,9 +5,15 @@ params { config_profile_url = 'https://www.uab.edu/cores/ircp/bds' } +env { + TMPDIR="$USER_SCRATCH" + SINGULARITY_TMPDIR="$USER_SCRATCH" +} + singularity { enabled = true autoMounts = true + runOptions = "--contain --workdir $USER_SCRATCH" } process { From 8dcfcfd856f1e657752911ce4771fdf3d9fa2467 Mon Sep 17 00:00:00 2001 From: Harshil Patel Date: Wed, 24 Aug 2022 12:13:17 +0100 Subject: [PATCH 60/91] Add monkeypox genome files to viralrecon genome config file --- conf/pipeline/viralrecon/genomes.config | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/conf/pipeline/viralrecon/genomes.config b/conf/pipeline/viralrecon/genomes.config index 0d5e754..ce2cf72 100644 --- a/conf/pipeline/viralrecon/genomes.config +++ b/conf/pipeline/viralrecon/genomes.config @@ -8,6 +8,8 @@ params { // Genome reference file paths genomes { + + // SARS-CoV-2 'NC_045512.2' { // This version of the reference has been kept here for backwards compatibility. // Please use 'MN908947.3' if possible because all primer sets are available / have been pre-prepared relative to that assembly @@ -18,6 +20,8 @@ params { nextclade_dataset_reference = 'MN908947' nextclade_dataset_tag = '2022-06-14T12:00:00Z' } + + // SARS-CoV-2 'MN908947.3' { fasta = 'https://github.com/nf-core/test-datasets/raw/viralrecon/genome/MN908947.3/GCA_009858895.3_ASM985889v3_genomic.200409.fna.gz' gff = 'https://github.com/nf-core/test-datasets/raw/viralrecon/genome/MN908947.3/GCA_009858895.3_ASM985889v3_genomic.200409.gff.gz' @@ -66,5 +70,24 @@ params { } } } + + // Monkeypox + 'NC_063383.1' { + fasta = 'https://github.com/nf-core/test-datasets/raw/viralrecon/genome/NC_063383.1/GCA_014621545.1_ASM1462154v1_genomic.220824.fna.gz' + gff = 'https://github.com/nf-core/test-datasets/raw/viralrecon/genome/NC_063383.1/GCA_014621545.1_ASM1462154v1_genomic.220824.gff.gz' + } + + // Monkeypox + 'ON563414.3' { + fasta = 'https://github.com/nf-core/test-datasets/raw/viralrecon/genome/ON563414.3/GCA_023516015.3_ASM2351601v1_genomic.220824.fna.gz' + gff = 'https://github.com/nf-core/test-datasets/raw/viralrecon/genome/ON563414.3/GCA_023516015.3_ASM2351601v1_genomic.220824.gff.gz' + } + + // Monkeypox + 'MT903344.1' { + fasta = 'https://github.com/nf-core/test-datasets/raw/viralrecon/genome/MT903344.1/GCA_014621585.1_ASM1462158v1_genomic.220824.fna.gz' + gff = 'https://github.com/nf-core/test-datasets/raw/viralrecon/genome/MT903344.1/GCA_014621585.1_ASM1462158v1_genomic.220824.gff.gz' + } + } } From 8b9661825b9e1d99083bc1331eccec47967e05f1 Mon Sep 17 00:00:00 2001 From: Harshil Patel Date: Wed, 24 Aug 2022 15:21:44 +0100 Subject: [PATCH 61/91] Replace NC_063383.1 fasta and gff and add Nextclade config --- conf/pipeline/viralrecon/genomes.config | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/conf/pipeline/viralrecon/genomes.config b/conf/pipeline/viralrecon/genomes.config index ce2cf72..06c8577 100644 --- a/conf/pipeline/viralrecon/genomes.config +++ b/conf/pipeline/viralrecon/genomes.config @@ -73,8 +73,12 @@ params { // Monkeypox 'NC_063383.1' { - fasta = 'https://github.com/nf-core/test-datasets/raw/viralrecon/genome/NC_063383.1/GCA_014621545.1_ASM1462154v1_genomic.220824.fna.gz' - gff = 'https://github.com/nf-core/test-datasets/raw/viralrecon/genome/NC_063383.1/GCA_014621545.1_ASM1462154v1_genomic.220824.gff.gz' + fasta = 'https://github.com/nf-core/test-datasets/raw/viralrecon/genome/NC_063383.1/GCF_014621545.1_ASM1462154v1_genomic.220824.fna.gz' + gff = 'https://github.com/nf-core/test-datasets/raw/viralrecon/genome/NC_063383.1/GCF_014621545.1_ASM1462154v1_genomic.220824.gff.gz' + nextclade_dataset = 'https://github.com/nf-core/test-datasets/raw/viralrecon/genome/NC_063383.1/nextclade_hMPXV_NC_063383.1_2022-08-19T12_00_00Z.tar.gz' + nextclade_dataset_name = 'hMPXV' + nextclade_dataset_reference = 'NC_063383.1' + nextclade_dataset_tag = '2022-08-19T12:00:00Z' } // Monkeypox From 93c4ba67a01122d702c5b73d3d8d1c7c3a54de56 Mon Sep 17 00:00:00 2001 From: kalayaneech Date: Thu, 25 Aug 2022 16:17:09 +0700 Subject: [PATCH 62/91] adding adcra configuration --- conf/adcra.config | 40 ++++++++++++++++++++++++++++++++++++++++ docs/adcra.md | 29 +++++++++++++++++++++++++++++ nfcore_custom.config | 1 + 3 files changed, 70 insertions(+) create mode 100644 conf/adcra.config create mode 100644 docs/adcra.md diff --git a/conf/adcra.config b/conf/adcra.config new file mode 100644 index 0000000..0654c5a --- /dev/null +++ b/conf/adcra.config @@ -0,0 +1,40 @@ +/* +* -------------------------------------------------------------- +* nf-core pipelines config file for AD project using CRA HPC +* -------------------------------------------------------------- +*/ + +params { + config_profile_name = 'adcra' + config_profile_description = 'CRA HPC profile provided by nf-core/configs' + config_profile_contact = 'Kalayanee Chairat (@kalayaneech)' + config_profile_url = 'https://bioinformatics.kmutt.ac.th/' + } + +params { + max_cpus = 16 + max_memory = 128.GB + max_time = 120.h +} + +// Specify the job scheduler +executor { + name = 'slurm' + queueSize = 20 + submitRateLimit = '6/1min' +} + +Singularity { + enabled = true + autoMounts = true +} + +process { + scratch = true + queue = 'unlimit' + queueStatInterval = '10 min' + maxRetries = 3 + errorStrategy = { task.attempt <=3 ? 'retry' : 'finish' } + cache = 'lenient' + exitStatusReadTimeoutMillis = '2700000' +} diff --git a/docs/adcra.md b/docs/adcra.md new file mode 100644 index 0000000..5e9d058 --- /dev/null +++ b/docs/adcra.md @@ -0,0 +1,29 @@ +# nf-core/configs: CRA HPC Configuration +nfcore pipeline sarek and rnaseq have been tested on the CRA HPC. + +## Before running the pipeline +- You will need an account to use the CRA HPC cluster in order to run the pipeline. +- Make sure that Singularity and Nextflow are installed. +- Downlode pipeline singularity images to a HPC system using [nf-core tools](https://nf-co.re/tools/#downloading-pipelines-for-offline-use) +``` +$ conda install nf-core +$ nf-core download +``` +- You will need to specify a Singularity cache directory in your ~./bashrc. This will store your container images in this cache directory without repeatedly downloading them every time you run a pipeline. Since space on home directory is limited, using lustre file system is recommended. +``` +export NXF_SINGULARITY_CACHEDIR = "/lustre/fs0/storage/yourCRAAccount/cache_dir" +``` +- Download iGenome reference to be used as a local copy. +``` +$ aws s3 --no-sign-request --region eu-west-1 sync s3://ngi-igenomes/igenomes/Homo_sapiens/GATK/GRCh38/ /lustre/fs0/storage/yourCRAAccount/references/Homo_sapiens/GATK/GRCh38/ +``` +## Running the pipeline using the adcra config profile +- Run the pipeline within a [screen](https://linuxize.com/post/how-to-use-linux-screen/) or [tmux](https://linuxize.com/post/getting-started-with-tmux/) session. +- Specify the config profile with ```-profile adcra```. +- Using lustre file systems to store results (```--outdir```) and intermediate files (```-work-dir```) is recommended. +``` +nextflow run /path/to/nf-core/ -profile adcra \ +--genome GRCh38 \ +--igenomes_base /path/to/genome_references/ \ +... # the rest of pipeline flags +``` diff --git a/nfcore_custom.config b/nfcore_custom.config index 6f0ac6c..0083bc5 100644 --- a/nfcore_custom.config +++ b/nfcore_custom.config @@ -11,6 +11,7 @@ //Please use a new line per include Config section to allow easier linting/parsing. Thank you. profiles { abims { includeConfig "${params.custom_config_base}/conf/abims.config" } + adcra { includeConfig "${params.custom_config_base}/conf/adcra.config" } alice { includeConfig "${params.custom_config_base}/conf/alice.config" } aws_tower { includeConfig "${params.custom_config_base}/conf/aws_tower.config" } awsbatch { includeConfig "${params.custom_config_base}/conf/awsbatch.config" } From 317e5a16cb8c769115b0c7554c060d638426b134 Mon Sep 17 00:00:00 2001 From: Bruno Grande Date: Thu, 25 Aug 2022 16:24:52 -0700 Subject: [PATCH 63/91] Improve AWS-related config for Sage profile --- conf/sage.config | 112 +++++++++++++++++++++++++++++------------------ docs/sage.md | 7 ++- 2 files changed, 74 insertions(+), 45 deletions(-) diff --git a/conf/sage.config b/conf/sage.config index e5bfa8b..7477d83 100644 --- a/conf/sage.config +++ b/conf/sage.config @@ -1,62 +1,88 @@ +// Config profile metadata params { config_profile_description = 'The Sage Bionetworks profile' config_profile_contact = 'Bruno Grande (@BrunoGrandePhD)' config_profile_url = 'https://github.com/Sage-Bionetworks-Workflows' } +// Leverage us-east-1 mirror of select human and mouse genomes +params { + igenomes_base = 's3://sage-igenomes/igenomes' +} + +// Enable retries globally for certain exit codes process { - - cpus = { check_max( 1 * slow(task.attempt), 'cpus' ) } - memory = { check_max( 6.GB * task.attempt, 'memory' ) } - time = { check_max( 24.h * task.attempt, 'time' ) } - errorStrategy = { task.exitStatus in [143,137,104,134,139,247] ? 'retry' : 'finish' } maxRetries = 5 maxErrors = '-1' - - // Process-specific resource requirements - withLabel:process_low { - cpus = { check_max( 4 * slow(task.attempt), 'cpus' ) } - memory = { check_max( 12.GB * task.attempt, 'memory' ) } - time = { check_max( 24.h * task.attempt, 'time' ) } - } - withLabel:process_medium { - cpus = { check_max( 12 * slow(task.attempt), 'cpus' ) } - memory = { check_max( 36.GB * task.attempt, 'memory' ) } - time = { check_max( 48.h * task.attempt, 'time' ) } - } - withLabel:process_high { - cpus = { check_max( 24 * slow(task.attempt), 'cpus' ) } - memory = { check_max( 72.GB * task.attempt, 'memory' ) } - time = { check_max( 96.h * task.attempt, 'time' ) } - } - withLabel:process_long { - time = { check_max( 192.h * task.attempt, 'time' ) } - } - withLabel:process_high_memory { - memory = { check_max( 128.GB * task.attempt, 'memory' ) } - } - - // Preventing Sarek labels from using the actual maximums - withLabel:memory_max { - memory = { check_max( 128.GB * task.attempt, 'memory' ) } - } - withLabel:cpus_max { - cpus = { check_max( 24 * slow(task.attempt), 'cpus' ) } - } - } +// Increase time limit to allow file transfers to finish +// The default is 12 hours, which results in timeouts +threadPool.FileTransfer.maxAwait = '24 hour' + +// Configure Nextflow to be more reliable on AWS aws { region = "us-east-1" + client { + uploadChunkSize = 209715200 + } +} +executor { + name = 'awsbatch' + // Ensure unlimited queue size on AWS Batch + queueSize = 100000 + // Slow down the rate at which AWS Batch jobs accumulate in + // the queue (an attempt to prevent orphaned EBS volumes) + submitRateLimit = '5 / 1 sec' } -params { - igenomes_base = 's3://sage-igenomes/igenomes' - max_memory = 500.GB - max_cpus = 64 - max_time = 168.h // One week -} +// Disabling resource allocation tweaks for now +// +// params { +// max_memory = 500.GB +// max_cpus = 64 +// max_time = 168.h // One week +// } +// +// process { +// +// cpus = { check_max( 1 * slow(task.attempt), 'cpus' ) } +// memory = { check_max( 6.GB * task.attempt, 'memory' ) } +// time = { check_max( 24.h * task.attempt, 'time' ) } +// +// // Process-specific resource requirements +// withLabel:process_low { +// cpus = { check_max( 4 * slow(task.attempt), 'cpus' ) } +// memory = { check_max( 12.GB * task.attempt, 'memory' ) } +// time = { check_max( 24.h * task.attempt, 'time' ) } +// } +// withLabel:process_medium { +// cpus = { check_max( 12 * slow(task.attempt), 'cpus' ) } +// memory = { check_max( 36.GB * task.attempt, 'memory' ) } +// time = { check_max( 48.h * task.attempt, 'time' ) } +// } +// withLabel:process_high { +// cpus = { check_max( 24 * slow(task.attempt), 'cpus' ) } +// memory = { check_max( 72.GB * task.attempt, 'memory' ) } +// time = { check_max( 96.h * task.attempt, 'time' ) } +// } +// withLabel:process_long { +// time = { check_max( 192.h * task.attempt, 'time' ) } +// } +// withLabel:process_high_memory { +// memory = { check_max( 128.GB * task.attempt, 'memory' ) } +// } +// +// // Preventing Sarek labels from using the actual maximums +// withLabel:memory_max { +// memory = { check_max( 128.GB * task.attempt, 'memory' ) } +// } +// withLabel:cpus_max { +// cpus = { check_max( 24 * slow(task.attempt), 'cpus' ) } +// } +// +// } // Function to slow the increase of the resource multipler // as attempts are made. The rationale is that some CPUs diff --git a/docs/sage.md b/docs/sage.md index 133ccec..d503b42 100644 --- a/docs/sage.md +++ b/docs/sage.md @@ -5,11 +5,14 @@ To use this custom configuration, run the pipeline with `-profile sage`. This wi This global configuration includes the following tweaks: - Update the default value for `igenomes_base` to `s3://sage-igenomes` -- Increase the default time limits because we run pipelines on AWS - Enable retries by default when exit codes relate to insufficient memory - Allow pending jobs to finish if the number of retries are exhausted -- Slow the increase in the number of allocated CPU cores on retries +- Increase the amount of time allowed for file transfers +- Increase the default chunk size for multipart uploads to S3 +- Slow down job submission rate to avoid overwhelming any APIs - Define the `check_max()` function, which is missing in Sarek v2 +- (Disabled temporarily) Slow the increase in the number of allocated CPU cores on retries +- (Disabled temporarily) Increase the default time limits because we run pipelines on AWS ## Additional information about iGenomes From 90f638d3761a6b8fa6560e1a3d09a6a62a6ee44a Mon Sep 17 00:00:00 2001 From: Austyn Trull Date: Mon, 29 Aug 2022 10:47:46 -0500 Subject: [PATCH 64/91] Updating the cheaha documentation to clarify that /data/temporary-scratch/atrull will be used as storage for files that would normally go into one of the temporary locations when using Singularity --- docs/cheaha.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/cheaha.md b/docs/cheaha.md index 9d871c3..fb04c88 100644 --- a/docs/cheaha.md +++ b/docs/cheaha.md @@ -13,6 +13,8 @@ module load Singularity module load Nextflow ``` +Various tasks will be run inside of Singularity containers and all temp files typically written to `/tmp` and `/var/tmp` are instead written to the path pointed to by the `USER_SCRATCH` environment variable. This means that these temp files are stored in a user specific location, making them inaccessible to other users for pipeline reruns. Some of these temp files can be large and cleanup is also the responsibility of the user. + All of the intermediate files required to run the pipeline will be stored in the `work/` directory. It is recommended to delete this directory after the pipeline has finished successfully because it can get quite large, and all of the main output files will be saved in the `results/` directory anyway. > NB: You will need an account to use the HPC cluster on Cheaha in order to run the pipeline. If in doubt contact UAB IT Research Computing.

From 51871ea6fca317ed6fca37597cabea895d62cbba Mon Sep 17 00:00:00 2001 From: Annick Renevey <47788523+rannick@users.noreply.github.com> Date: Wed, 31 Aug 2022 11:32:01 +0200 Subject: [PATCH 65/91] rnafusion setup for hasta usage --- conf/pipeline/rnafusion/hasta.config | 7 +++++++ 1 file changed, 7 insertions(+) create mode 100644 conf/pipeline/rnafusion/hasta.config diff --git a/conf/pipeline/rnafusion/hasta.config b/conf/pipeline/rnafusion/hasta.config new file mode 100644 index 0000000..fdd7475 --- /dev/null +++ b/conf/pipeline/rnafusion/hasta.config @@ -0,0 +1,7 @@ +// rnafusion/hasta specific profile config for Clinical Genomics Stockholm usage + +params { + all = true + trim = true + fusioninspector_filter = true +} From 0851903f66068d4e05b43f6a296134098063a5a1 Mon Sep 17 00:00:00 2001 From: Annick Renevey <47788523+rannick@users.noreply.github.com> Date: Wed, 31 Aug 2022 14:19:07 +0200 Subject: [PATCH 66/91] include new config in pipeline configs file --- README.md | 1 + docs/pipeline/rnafusion/hasta.md | 19 +++++++++++++++++++ pipeline/rnafusion.config | 4 +++- 3 files changed, 23 insertions(+), 1 deletion(-) create mode 100644 docs/pipeline/rnafusion/hasta.md diff --git a/README.md b/README.md index 94ce55c..ae45869 100644 --- a/README.md +++ b/README.md @@ -198,6 +198,7 @@ Currently documentation is available for the following pipelines within specific - mag - [EVA](docs/pipeline/mag/eva.md) - rnafusion + - [HASTA](docs/pipeline/rnafusion/hasta.md) - [MUNIN](docs/pipeline/rnafusion/munin.md) - rnavar - [MUNIN](docs/pipeline/rnavar/munin.md) diff --git a/docs/pipeline/rnafusion/hasta.md b/docs/pipeline/rnafusion/hasta.md new file mode 100644 index 0000000..9f7813d --- /dev/null +++ b/docs/pipeline/rnafusion/hasta.md @@ -0,0 +1,19 @@ +# nf-core/configs: HASTA rnafusion specific configuration + +Extra specific configuration for rnafusion pipeline + +## Usage + +To use, run the pipeline with `-profile hasta`. + +This will download and launch the rnafusion specific [`hasta.config`](../../../conf/pipeline/rnafusion/munin.config) which has been pre-configured with a setup suitable for the `HASTA` cluster. + +Example: `nextflow run nf-core/rnafusion -profile hasta` + +## rnafusion specific configurations for HASTA + +Specific configurations for `HASTA` has been made for rnafusion. + +- Always run all the analysis steps (all = true) +- Use trimming (trim = true) +- Take the fusions identified by at least 2 fusion detection tools to the fusioninspector analysis (fusioninspector_filter = true) diff --git a/pipeline/rnafusion.config b/pipeline/rnafusion.config index 2d86d89..f5a4e47 100644 --- a/pipeline/rnafusion.config +++ b/pipeline/rnafusion.config @@ -9,5 +9,7 @@ */ profiles { + hasta { includeConfig "${params.custom_config_base}/conf/pipeline/rnafusion/hasta.config" } munin { includeConfig "${params.custom_config_base}/conf/pipeline/rnafusion/munin.config" } -} \ No newline at end of file + +} From 242101db16a342a45ef806e00ab20243327c4322 Mon Sep 17 00:00:00 2001 From: Annick Renevey <47788523+rannick@users.noreply.github.com> Date: Wed, 31 Aug 2022 14:26:32 +0200 Subject: [PATCH 67/91] fix extra space --- pipeline/rnafusion.config | 1 - 1 file changed, 1 deletion(-) diff --git a/pipeline/rnafusion.config b/pipeline/rnafusion.config index f5a4e47..894f6ab 100644 --- a/pipeline/rnafusion.config +++ b/pipeline/rnafusion.config @@ -11,5 +11,4 @@ profiles { hasta { includeConfig "${params.custom_config_base}/conf/pipeline/rnafusion/hasta.config" } munin { includeConfig "${params.custom_config_base}/conf/pipeline/rnafusion/munin.config" } - } From 1e1ca1e96b2cd3330949d10200eb352ab6470271 Mon Sep 17 00:00:00 2001 From: SPearce Date: Wed, 31 Aug 2022 15:35:57 +0100 Subject: [PATCH 68/91] Incorporating executor options --- conf/crukmi.config | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/conf/crukmi.config b/conf/crukmi.config index f73c252..000eda9 100644 --- a/conf/crukmi.config +++ b/conf/crukmi.config @@ -42,7 +42,12 @@ process { memory = { 5.GB * task.attempt } } +} +executor { + name = 'pbs' + queueSize = 1000 + pollInterval = '10 sec' } params { From c8837235591ad4925fcefcb4f6b96036a14cd53b Mon Sep 17 00:00:00 2001 From: Bruno Grande Date: Wed, 31 Aug 2022 09:10:30 -0700 Subject: [PATCH 69/91] Simplify resource adjustments --- conf/sage.config | 83 ++++++++++++++++++++---------------------------- docs/sage.md | 4 +-- 2 files changed, 36 insertions(+), 51 deletions(-) diff --git a/conf/sage.config b/conf/sage.config index 7477d83..615da63 100644 --- a/conf/sage.config +++ b/conf/sage.config @@ -37,58 +37,43 @@ executor { submitRateLimit = '5 / 1 sec' } -// Disabling resource allocation tweaks for now -// -// params { -// max_memory = 500.GB -// max_cpus = 64 -// max_time = 168.h // One week -// } -// -// process { -// -// cpus = { check_max( 1 * slow(task.attempt), 'cpus' ) } -// memory = { check_max( 6.GB * task.attempt, 'memory' ) } -// time = { check_max( 24.h * task.attempt, 'time' ) } -// -// // Process-specific resource requirements -// withLabel:process_low { -// cpus = { check_max( 4 * slow(task.attempt), 'cpus' ) } -// memory = { check_max( 12.GB * task.attempt, 'memory' ) } -// time = { check_max( 24.h * task.attempt, 'time' ) } -// } -// withLabel:process_medium { -// cpus = { check_max( 12 * slow(task.attempt), 'cpus' ) } -// memory = { check_max( 36.GB * task.attempt, 'memory' ) } -// time = { check_max( 48.h * task.attempt, 'time' ) } -// } -// withLabel:process_high { -// cpus = { check_max( 24 * slow(task.attempt), 'cpus' ) } -// memory = { check_max( 72.GB * task.attempt, 'memory' ) } -// time = { check_max( 96.h * task.attempt, 'time' ) } -// } -// withLabel:process_long { -// time = { check_max( 192.h * task.attempt, 'time' ) } -// } -// withLabel:process_high_memory { -// memory = { check_max( 128.GB * task.attempt, 'memory' ) } -// } -// -// // Preventing Sarek labels from using the actual maximums -// withLabel:memory_max { -// memory = { check_max( 128.GB * task.attempt, 'memory' ) } -// } -// withLabel:cpus_max { -// cpus = { check_max( 24 * slow(task.attempt), 'cpus' ) } -// } -// -// } +// Adjust default resource allocations (see `../docs/sage.md`) +process { + + cpus = { check_max( 1 * slow(task.attempt), 'cpus' ) } + memory = { check_max( 6.GB * task.attempt, 'memory' ) } + time = { check_max( 24.h * task.attempt, 'time' ) } + + // Process-specific resource requirements + withLabel:process_low { + cpus = { check_max( 4 * slow(task.attempt), 'cpus' ) } + memory = { check_max( 12.GB * task.attempt, 'memory' ) } + time = { check_max( 24.h * task.attempt, 'time' ) } + } + withLabel:process_medium { + cpus = { check_max( 12 * slow(task.attempt), 'cpus' ) } + memory = { check_max( 36.GB * task.attempt, 'memory' ) } + time = { check_max( 48.h * task.attempt, 'time' ) } + } + withLabel:process_high { + cpus = { check_max( 24 * slow(task.attempt), 'cpus' ) } + memory = { check_max( 72.GB * task.attempt, 'memory' ) } + time = { check_max( 96.h * task.attempt, 'time' ) } + } + withLabel:process_long { + time = { check_max( 192.h * task.attempt, 'time' ) } + } + withLabel:process_high_memory { + memory = { check_max( 128.GB * task.attempt, 'memory' ) } + } + +} // Function to slow the increase of the resource multipler -// as attempts are made. The rationale is that some CPUs -// don't need to be increased as fast as memory. +// as attempts are made. The rationale is that the number +// of CPU cores isn't a limiting factor as often as memory. def slow(attempt, factor = 2) { - return Math.ceil( attempt / factor) as int + return Math.ceil( attempt / factor) as int } diff --git a/docs/sage.md b/docs/sage.md index d503b42..1e36fed 100644 --- a/docs/sage.md +++ b/docs/sage.md @@ -11,8 +11,8 @@ This global configuration includes the following tweaks: - Increase the default chunk size for multipart uploads to S3 - Slow down job submission rate to avoid overwhelming any APIs - Define the `check_max()` function, which is missing in Sarek v2 -- (Disabled temporarily) Slow the increase in the number of allocated CPU cores on retries -- (Disabled temporarily) Increase the default time limits because we run pipelines on AWS +- Slow the increase in the number of allocated CPU cores on retries +- Increase the default time limits because we run pipelines on AWS ## Additional information about iGenomes From 179b343bd20995bd48fc57b44a6da07340995025 Mon Sep 17 00:00:00 2001 From: Bruno Grande Date: Wed, 31 Aug 2022 09:18:27 -0700 Subject: [PATCH 70/91] Incorporate resource limits --- conf/sage.config | 3 +++ 1 file changed, 3 insertions(+) diff --git a/conf/sage.config b/conf/sage.config index 615da63..bfe1e09 100644 --- a/conf/sage.config +++ b/conf/sage.config @@ -8,6 +8,9 @@ params { // Leverage us-east-1 mirror of select human and mouse genomes params { igenomes_base = 's3://sage-igenomes/igenomes' + max_memory = '128.GB' + max_cpus = 16 + max_time = '240.h' } // Enable retries globally for certain exit codes From 9681716205fdd6b707ba9b0b1b680968ce17ae56 Mon Sep 17 00:00:00 2001 From: Kalayanee Chairat <92149214+kalayaneech@users.noreply.github.com> Date: Thu, 1 Sep 2022 15:11:18 +0700 Subject: [PATCH 71/91] Update adcra.config Fixing typo --- conf/adcra.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/adcra.config b/conf/adcra.config index 0654c5a..8ed7f64 100644 --- a/conf/adcra.config +++ b/conf/adcra.config @@ -24,7 +24,7 @@ executor { submitRateLimit = '6/1min' } -Singularity { +singularity { enabled = true autoMounts = true } From bf2ab39ea0256f37fddfd86d99a2e36e6ca5f218 Mon Sep 17 00:00:00 2001 From: Kalayanee Chairat <92149214+kalayaneech@users.noreply.github.com> Date: Thu, 1 Sep 2022 15:15:51 +0700 Subject: [PATCH 72/91] Update README.md --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index ae45869..992bace 100644 --- a/README.md +++ b/README.md @@ -86,6 +86,7 @@ See [`nf-core/configs/docs`](https://github.com/nf-core/configs/tree/master/docs Currently documentation is available for the following systems: - [ABIMS](docs/abims.md) +- [ADCRA](docs/adcra.md) - [ALICE](docs/alice.md) - [AWSBATCH](docs/awsbatch.md) - [AWS_TOWER](docs/aws_tower.md) From 8e275182ea4ab0e9970042d3857f011ab1986a1a Mon Sep 17 00:00:00 2001 From: Kalayanee Chairat <92149214+kalayaneech@users.noreply.github.com> Date: Thu, 1 Sep 2022 15:16:56 +0700 Subject: [PATCH 73/91] Update main.yml --- .github/workflows/main.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index f5ba0d1..eca11e6 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -30,6 +30,7 @@ jobs: matrix: profile: - "abims" + - "adcra" - "alice" - "aws_tower" - "awsbatch" From 28e59e38b4e73336109decbc3ea15b99deecb218 Mon Sep 17 00:00:00 2001 From: ameynert Date: Thu, 1 Sep 2022 09:28:05 +0100 Subject: [PATCH 74/91] Run prettier --- docs/adcra.md | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/docs/adcra.md b/docs/adcra.md index 5e9d058..0494b5a 100644 --- a/docs/adcra.md +++ b/docs/adcra.md @@ -1,26 +1,36 @@ # nf-core/configs: CRA HPC Configuration + nfcore pipeline sarek and rnaseq have been tested on the CRA HPC. ## Before running the pipeline + - You will need an account to use the CRA HPC cluster in order to run the pipeline. - Make sure that Singularity and Nextflow are installed. - Downlode pipeline singularity images to a HPC system using [nf-core tools](https://nf-co.re/tools/#downloading-pipelines-for-offline-use) + ``` $ conda install nf-core $ nf-core download ``` -- You will need to specify a Singularity cache directory in your ~./bashrc. This will store your container images in this cache directory without repeatedly downloading them every time you run a pipeline. Since space on home directory is limited, using lustre file system is recommended. + +- You will need to specify a Singularity cache directory in your ~./bashrc. This will store your container images in this cache directory without repeatedly downloading them every time you run a pipeline. Since space on home directory is limited, using lustre file system is recommended. + ``` export NXF_SINGULARITY_CACHEDIR = "/lustre/fs0/storage/yourCRAAccount/cache_dir" ``` + - Download iGenome reference to be used as a local copy. + ``` $ aws s3 --no-sign-request --region eu-west-1 sync s3://ngi-igenomes/igenomes/Homo_sapiens/GATK/GRCh38/ /lustre/fs0/storage/yourCRAAccount/references/Homo_sapiens/GATK/GRCh38/ ``` + ## Running the pipeline using the adcra config profile + - Run the pipeline within a [screen](https://linuxize.com/post/how-to-use-linux-screen/) or [tmux](https://linuxize.com/post/getting-started-with-tmux/) session. -- Specify the config profile with ```-profile adcra```. -- Using lustre file systems to store results (```--outdir```) and intermediate files (```-work-dir```) is recommended. +- Specify the config profile with `-profile adcra`. +- Using lustre file systems to store results (`--outdir`) and intermediate files (`-work-dir`) is recommended. + ``` nextflow run /path/to/nf-core/ -profile adcra \ --genome GRCh38 \ From b22e8b79c7f233918fef4a8f637376fc1d946144 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Thu, 1 Sep 2022 11:01:23 +0200 Subject: [PATCH 75/91] Fix cheaha config due to use of environmental variable --- conf/cheaha.config | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/conf/cheaha.config b/conf/cheaha.config index 578259c..58963b5 100644 --- a/conf/cheaha.config +++ b/conf/cheaha.config @@ -1,3 +1,6 @@ +// Define the Scratch directory +def scratch_dir = System.getenv("USER_SCRATCH") ?: "/tmp" + params { config_profile_name = 'cheaha' config_profile_description = 'University of Alabama at Birmingham Cheaha HPC' @@ -6,14 +9,14 @@ params { } env { - TMPDIR="$USER_SCRATCH" - SINGULARITY_TMPDIR="$USER_SCRATCH" + TMPDIR="$USER" + SINGULARITY_TMPDIR="$scratch_dir" } singularity { enabled = true autoMounts = true - runOptions = "--contain --workdir $USER_SCRATCH" + runOptions = "--contain --workdir $scratch_dir" } process { From 51f0359b3d1d41b7d560c2076f8ef2713de1dfc6 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Thu, 1 Sep 2022 11:12:15 +0200 Subject: [PATCH 76/91] Add documentation for when you need to use an environmental variable --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index 992bace..ff4c6cb 100644 --- a/README.md +++ b/README.md @@ -67,6 +67,8 @@ Before adding your config file to nf-core/configs, we highly recommend writing a N.B. In your config file, please also make sure to add an extra `params` section with `params.config_profile_description`, `params.config_profile_contact` and `params.config_profile_url` set to reasonable values. Users will get information on who wrote the configuration profile then when executing a nf-core pipeline and can report back if there are things missing for example. +N.B. If you try to specify a shell environment variable within your profile, in some cases you may get an error during testing of something like `Unknown config attribute env.USER_SCRATCH -- check config file: /home/runner/work/configs/configs/nextflow.config` (where the bash environment variable is `$USER_SCRATCH`). This is because the github runner will not have your institutional environment variables set. To fix this you can define this as an internal variable, and set a fallback value for that variable. A good example is in the [VSC_UGENT profile](`https://github.com/nf-core/configs/blob/69468e7ca769643b151a6cfd1ab24185fc341c06/conf/vsc_ugent.config#L2`). + ### Testing If you want to add a new custom config file to `nf-core/configs` please test that your pipeline of choice runs as expected by using the [`-c`](https://www.nextflow.io/docs/latest/config.html) parameter. From 897d0a188b4e2bacba1dee5ff9345d4fef040506 Mon Sep 17 00:00:00 2001 From: Simon Pearce <24893913+SPPearce@users.noreply.github.com> Date: Thu, 1 Sep 2022 10:32:43 +0100 Subject: [PATCH 77/91] Update nfcore_custom.config Co-authored-by: Maxime U. Garcia --- nfcore_custom.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nfcore_custom.config b/nfcore_custom.config index fa26f03..fae764b 100644 --- a/nfcore_custom.config +++ b/nfcore_custom.config @@ -31,7 +31,7 @@ profiles { cheaha { includeConfig "${params.custom_config_base}/conf/cheaha.config" } computerome { includeConfig "${params.custom_config_base}/conf/computerome.config" } crick { includeConfig "${params.custom_config_base}/conf/crick.config" } - crukmi { includeConfig "${params.custom_config_base}/conf/crukmi.config" } + crukmi { includeConfig "${params.custom_config_base}/conf/crukmi.config" } czbiohub_aws { includeConfig "${params.custom_config_base}/conf/czbiohub_aws.config" } denbi_qbic { includeConfig "${params.custom_config_base}/conf/denbi_qbic.config" } ebc { includeConfig "${params.custom_config_base}/conf/ebc.config" } From ba1d3f24cc2dfff418ab9d88920ca753c99aa9aa Mon Sep 17 00:00:00 2001 From: Simon Pearce <24893913+SPPearce@users.noreply.github.com> Date: Thu, 1 Sep 2022 11:07:14 +0100 Subject: [PATCH 78/91] Update crukmi.config --- conf/crukmi.config | 5 ----- 1 file changed, 5 deletions(-) diff --git a/conf/crukmi.config b/conf/crukmi.config index 000eda9..4823585 100644 --- a/conf/crukmi.config +++ b/conf/crukmi.config @@ -37,11 +37,6 @@ process { memory = { check_max( 80.GB * task.attempt, 'memory' ) } } - withName: 'SAMTOOLS_MPILEUP' { - cpus = 1 - memory = { 5.GB * task.attempt } - } - } executor { From 800931bff22fb4c466cf41ae0fe8ea60e2f5085f Mon Sep 17 00:00:00 2001 From: SPearce Date: Thu, 1 Sep 2022 11:13:23 +0100 Subject: [PATCH 79/91] Add Sarek specific config --- conf/pipeline/sarek/crukmi.config | 33 +++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100644 conf/pipeline/sarek/crukmi.config diff --git a/conf/pipeline/sarek/crukmi.config b/conf/pipeline/sarek/crukmi.config new file mode 100644 index 0000000..0b2aecd --- /dev/null +++ b/conf/pipeline/sarek/crukmi.config @@ -0,0 +1,33 @@ +// Profile config names for nf-core/configs + +params { + // Specific nf-core/configs params + config_profile_description = 'Cancer Research UK Manchester Institute HPC cluster profile provided by nf-core/configs' + config_profile_contact = 'Stephen Kitcatt, Simon Pearce (@skitcattCRUKMI, @sppearce)' + config_profile_url = 'http://scicom.picr.man.ac.uk/projects/user-support/wiki' +} + +// Specific nf-core/sarek process configuration +process { + + withName: 'SAMTOOLS_MPILEUP' { + cpus = 1 + memory = { check_resource( 5.GB * task.attempt) } + } + +} + +def check_resource(obj) { + try { + if (obj.getClass() == nextflow.util.MemoryUnit && obj.compareTo(params.max_memory as nextflow.util.MemoryUnit) == 1) + return params.max_memory as nextflow.util.MemoryUnit + else if (obj.getClass() == nextflow.util.Duration && obj.compareTo(params.max_time as nextflow.util.Duration) == 1) + return params.max_time as nextflow.util.Duration + else if (obj.getClass() == java.lang.Integer) + return Math.min(obj, params.max_cpus as int) + else + return obj + } catch (all) { + println " ### ERROR ### Max params max_memory:'${params.max_memory}', max_time:'${params.max_time}' or max_cpus:'${params.max_cpus}' is not valid! Using default value: $obj" + } +} From a8faeda14130ada4323121e6e40ee9ad34b8bdd8 Mon Sep 17 00:00:00 2001 From: Simon Pearce <24893913+SPPearce@users.noreply.github.com> Date: Thu, 1 Sep 2022 11:23:07 +0100 Subject: [PATCH 80/91] Update crukmi.config --- conf/pipeline/sarek/crukmi.config | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/conf/pipeline/sarek/crukmi.config b/conf/pipeline/sarek/crukmi.config index 0b2aecd..66a0b85 100644 --- a/conf/pipeline/sarek/crukmi.config +++ b/conf/pipeline/sarek/crukmi.config @@ -16,18 +16,3 @@ process { } } - -def check_resource(obj) { - try { - if (obj.getClass() == nextflow.util.MemoryUnit && obj.compareTo(params.max_memory as nextflow.util.MemoryUnit) == 1) - return params.max_memory as nextflow.util.MemoryUnit - else if (obj.getClass() == nextflow.util.Duration && obj.compareTo(params.max_time as nextflow.util.Duration) == 1) - return params.max_time as nextflow.util.Duration - else if (obj.getClass() == java.lang.Integer) - return Math.min(obj, params.max_cpus as int) - else - return obj - } catch (all) { - println " ### ERROR ### Max params max_memory:'${params.max_memory}', max_time:'${params.max_time}' or max_cpus:'${params.max_cpus}' is not valid! Using default value: $obj" - } -} From 2c738cae33d2f95ffeb3b95002dde93d004ab1ea Mon Sep 17 00:00:00 2001 From: Simon Pearce <24893913+SPPearce@users.noreply.github.com> Date: Thu, 1 Sep 2022 11:24:11 +0100 Subject: [PATCH 81/91] Update crukmi.config --- conf/pipeline/sarek/crukmi.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/pipeline/sarek/crukmi.config b/conf/pipeline/sarek/crukmi.config index 66a0b85..46c734b 100644 --- a/conf/pipeline/sarek/crukmi.config +++ b/conf/pipeline/sarek/crukmi.config @@ -12,7 +12,7 @@ process { withName: 'SAMTOOLS_MPILEUP' { cpus = 1 - memory = { check_resource( 5.GB * task.attempt) } + memory = { 5.GB * task.attempt } } } From 5981b643c5c97f08963633f3497f238be0fa4b06 Mon Sep 17 00:00:00 2001 From: SPearce Date: Thu, 1 Sep 2022 11:47:12 +0100 Subject: [PATCH 82/91] Updated the sarek pipeline config --- pipeline/sarek.config | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/pipeline/sarek.config b/pipeline/sarek.config index 512541e..12676b2 100644 --- a/pipeline/sarek.config +++ b/pipeline/sarek.config @@ -9,10 +9,11 @@ */ profiles { - munin { includeConfig "${params.custom_config_base}/conf/pipeline/sarek/munin.config" } - uppmax { includeConfig "${params.custom_config_base}/conf/pipeline/sarek/uppmax.config" } - icr_davros { includeConfig "${params.custom_config_base}/conf/pipeline/sarek/icr_davros.config" } cfc { includeConfig "${params.custom_config_base}/conf/pipeline/sarek/cfc.config" } cfc_dev { includeConfig "${params.custom_config_base}/conf/pipeline/sarek/cfc.config" } + crukmi { includeConfig "${params.custom_config_base}/conf/pipeline/sarek/crukmi.config" } eddie { includeConfig "${params.custom_config_base}/conf/pipeline/sarek/eddie.config" } + icr_davros { includeConfig "${params.custom_config_base}/conf/pipeline/sarek/icr_davros.config" } + munin { includeConfig "${params.custom_config_base}/conf/pipeline/sarek/munin.config" } + uppmax { includeConfig "${params.custom_config_base}/conf/pipeline/sarek/uppmax.config" } } From a142ad7eaeab8fab0732a663aae49cd1086bdf3c Mon Sep 17 00:00:00 2001 From: SPearce Date: Thu, 1 Sep 2022 13:55:08 +0100 Subject: [PATCH 83/91] Added link to Sarek specific documentation --- README.md | 1 + docs/pipeline/sarek/crukmi.md | 17 +++++++++++++++++ 2 files changed, 18 insertions(+) create mode 100644 docs/pipeline/sarek/crukmi.md diff --git a/README.md b/README.md index 18d8edbf..445467b 100644 --- a/README.md +++ b/README.md @@ -207,6 +207,7 @@ Currently documentation is available for the following pipelines within specific - rnavar - [MUNIN](docs/pipeline/rnavar/munin.md) - sarek + - [Cancer Research UK Manchester Institute](docs/pipeline/sarek/crukmi.md) - [MUNIN](docs/pipeline/sarek/munin.md) - [UPPMAX](docs/pipeline/sarek/uppmax.md) - taxprofiler diff --git a/docs/pipeline/sarek/crukmi.md b/docs/pipeline/sarek/crukmi.md new file mode 100644 index 0000000..01d030a --- /dev/null +++ b/docs/pipeline/sarek/crukmi.md @@ -0,0 +1,17 @@ +# nf-core/configs: CRUK-MI sarek specific configuration + +Extra specific configuration for sarek pipeline + +## Usage + +To use, run the pipeline with `-profile crukmi`. + +This will download and launch the sarek specific [`crukmi.config`](../../../conf/pipeline/sarek/munin.config) which has been pre-configured with a setup suitable for the Cancer Research UK Manchester Institute cluster (phoenix). + +Example: `nextflow run nf-core/sarek -profile crukmi` + +## Sarek specific configurations for CRUK-MI + +Specific configurations for `CRUK-MI` has been made for sarek. + +- Initial requested resources for SAMTOOLS_MPILEUP are only 5GB and 1 core. From abae5c4bd897b79da97a6fb2ff6c990d6bd04c0b Mon Sep 17 00:00:00 2001 From: Bruno Grande Date: Mon, 12 Sep 2022 11:40:43 -0700 Subject: [PATCH 84/91] Decrease AWS Batch queue size --- conf/sage.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/sage.config b/conf/sage.config index bfe1e09..d43e719 100644 --- a/conf/sage.config +++ b/conf/sage.config @@ -34,7 +34,7 @@ aws { executor { name = 'awsbatch' // Ensure unlimited queue size on AWS Batch - queueSize = 100000 + queueSize = 500 // Slow down the rate at which AWS Batch jobs accumulate in // the queue (an attempt to prevent orphaned EBS volumes) submitRateLimit = '5 / 1 sec' From a9b62c53e0bece4fb578ff1168f72e820c3be6b9 Mon Sep 17 00:00:00 2001 From: lquayle88 Date: Tue, 13 Sep 2022 17:02:44 +0100 Subject: [PATCH 85/91] Added global institutional and pipeline configs for sbc_sharc --- .github/workflows/main.yml | 1 + README.md | 1 + conf/pipeline/atacseq/sbc_sharc.config | 74 ++++++ conf/pipeline/chipseq/sbc_sharc.config | 74 ++++++ conf/pipeline/rnaseq/sbc_sharc.config | 79 ++++++ conf/pipeline/sarek/sbc_sharc.config | 114 ++++++++ conf/sbc_sharc.config | 59 ++++ docs/sbc_sharc.md | 355 +++++++++++++++++++++++++ nfcore_custom.config | 1 + pipeline/atacseq.config | 13 + pipeline/chipseq.config | 13 + pipeline/rnaseq.config | 1 + pipeline/sarek.config | 1 + 13 files changed, 786 insertions(+) create mode 100644 conf/pipeline/atacseq/sbc_sharc.config create mode 100644 conf/pipeline/chipseq/sbc_sharc.config create mode 100644 conf/pipeline/rnaseq/sbc_sharc.config create mode 100644 conf/pipeline/sarek/sbc_sharc.config create mode 100644 conf/sbc_sharc.config create mode 100644 docs/sbc_sharc.md create mode 100644 pipeline/atacseq.config create mode 100644 pipeline/chipseq.config diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 487c7ee..ca01a15 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -83,6 +83,7 @@ jobs: - "sage" - "sahmri" - "sanger" + - "sbc_sharc" - "seg_globe" - "uct_hpc" - "unibe_ibu" diff --git a/README.md b/README.md index 445467b..0cad49f 100644 --- a/README.md +++ b/README.md @@ -137,6 +137,7 @@ Currently documentation is available for the following systems: - [ROSALIND](docs/rosalind.md) - [SAGE BIONETWORKS](docs/sage.md) - [SANGER](docs/sanger.md) +- [SBC_SHARC](docs/sbc_sharc.md) - [SEG_GLOBE](docs/seg_globe.md) - [UCT_HPC](docs/uct_hpc.md) - [UNIBE_IBU](docs/unibe_ibu.md) diff --git a/conf/pipeline/atacseq/sbc_sharc.config b/conf/pipeline/atacseq/sbc_sharc.config new file mode 100644 index 0000000..2e987d2 --- /dev/null +++ b/conf/pipeline/atacseq/sbc_sharc.config @@ -0,0 +1,74 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + Sheffield Bioinformatics Core Configuration Profile - ShARC + +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + Custom Pipeline Resource Config for nf-core/atacseq + +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + + +// process-specific resource requirements - reduced specification from those in rnaseq/conf/base.config + +process { + + withLabel:process_low { + cpus = { check_max( 2 * task.attempt, 'cpus' ) } + memory = { check_max( 4.GB * task.attempt, 'memory' ) } + time = { check_max( 4.h * task.attempt, 'time' ) } + } + + withLabel:process_medium { + cpus = { check_max( 4 * task.attempt, 'cpus' ) } + memory = { check_max( 8.GB * task.attempt, 'memory' ) } + time = { check_max( 6.h * task.attempt, 'time' ) } + } + + withLabel:process_high { + cpus = { check_max( 8 * task.attempt, 'cpus' ) } + memory = { check_max( 16.GB * task.attempt, 'memory' ) } + time = { check_max( 8.h * task.attempt, 'time' ) } + } + + withLabel:process_long { + time = { check_max( 12.h * task.attempt, 'time' ) } + } + +} + + +// function 'check_max()' to ensure that resource requirements don't go beyond maximum limit + +def check_max(obj, type) { + if (type == 'memory') { + try { + if (obj.compareTo(params.max_memory as nextflow.util.MemoryUnit) == 1) + return params.max_memory as nextflow.util.MemoryUnit + else + return obj + } catch (all) { + println " ### ERROR ### Max memory '${params.max_memory}' is not valid! Using default value: $obj" + return obj + } + } else if (type == 'time') { + try { + if (obj.compareTo(params.max_time as nextflow.util.Duration) == 1) + return params.max_time as nextflow.util.Duration + else + return obj + } catch (all) { + println " ### ERROR ### Max time '${params.max_time}' is not valid! Using default value: $obj" + return obj + } + } else if (type == 'cpus') { + try { + return Math.min(obj, params.max_cpus as int) + } catch (all) { + println " ### ERROR ### Max cpus '${params.max_cpus}' is not valid! Using default value: $obj" + return obj + } + } +} diff --git a/conf/pipeline/chipseq/sbc_sharc.config b/conf/pipeline/chipseq/sbc_sharc.config new file mode 100644 index 0000000..2741453 --- /dev/null +++ b/conf/pipeline/chipseq/sbc_sharc.config @@ -0,0 +1,74 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + Sheffield Bioinformatics Core Configuration Profile - ShARC + +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + Custom Pipeline Resource Config for nf-core/chipseq + +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + + +// process-specific resource requirements - reduced specification from those in rnaseq/conf/base.config + +process { + + withLabel:process_low { + cpus = { check_max( 2 * task.attempt, 'cpus' ) } + memory = { check_max( 4.GB * task.attempt, 'memory' ) } + time = { check_max( 4.h * task.attempt, 'time' ) } + } + + withLabel:process_medium { + cpus = { check_max( 4 * task.attempt, 'cpus' ) } + memory = { check_max( 8.GB * task.attempt, 'memory' ) } + time = { check_max( 6.h * task.attempt, 'time' ) } + } + + withLabel:process_high { + cpus = { check_max( 8 * task.attempt, 'cpus' ) } + memory = { check_max( 16.GB * task.attempt, 'memory' ) } + time = { check_max( 8.h * task.attempt, 'time' ) } + } + + withLabel:process_long { + time = { check_max( 12.h * task.attempt, 'time' ) } + } + +} + + +// function 'check_max()' to ensure that resource requirements don't go beyond maximum limit + +def check_max(obj, type) { + if (type == 'memory') { + try { + if (obj.compareTo(params.max_memory as nextflow.util.MemoryUnit) == 1) + return params.max_memory as nextflow.util.MemoryUnit + else + return obj + } catch (all) { + println " ### ERROR ### Max memory '${params.max_memory}' is not valid! Using default value: $obj" + return obj + } + } else if (type == 'time') { + try { + if (obj.compareTo(params.max_time as nextflow.util.Duration) == 1) + return params.max_time as nextflow.util.Duration + else + return obj + } catch (all) { + println " ### ERROR ### Max time '${params.max_time}' is not valid! Using default value: $obj" + return obj + } + } else if (type == 'cpus') { + try { + return Math.min(obj, params.max_cpus as int) + } catch (all) { + println " ### ERROR ### Max cpus '${params.max_cpus}' is not valid! Using default value: $obj" + return obj + } + } +} diff --git a/conf/pipeline/rnaseq/sbc_sharc.config b/conf/pipeline/rnaseq/sbc_sharc.config new file mode 100644 index 0000000..52bf0ff --- /dev/null +++ b/conf/pipeline/rnaseq/sbc_sharc.config @@ -0,0 +1,79 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + Sheffield Bioinformatics Core Configuration Profile - ShARC + +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + Custom Pipeline Resource Config for nf-core/rnaseq + +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + + +// process-specific resource requirements - reduced specification from those in rnaseq/conf/base.config + +process { + + withLabel:process_low { + cpus = { check_max( 2 * task.attempt, 'cpus' ) } + memory = { check_max( 4.GB * task.attempt, 'memory' ) } + time = { check_max( 4.h * task.attempt, 'time' ) } + } + + withLabel:process_medium { + cpus = { check_max( 4 * task.attempt, 'cpus' ) } + memory = { check_max( 8.GB * task.attempt, 'memory' ) } + time = { check_max( 6.h * task.attempt, 'time' ) } + } + + withLabel:process_high { + cpus = { check_max( 8 * task.attempt, 'cpus' ) } + memory = { check_max( 16.GB * task.attempt, 'memory' ) } + time = { check_max( 8.h * task.attempt, 'time' ) } + } + + withLabel:process_long { + time = { check_max( 12.h * task.attempt, 'time' ) } + } + + withLabel:process_high_memory { + memory = { check_max( 60.GB * task.attempt, 'memory' ) } + } + +} + + +// function 'check_max()' to ensure that resource requirements don't go beyond maximum limit + +def check_max(obj, type) { + if (type == 'memory') { + try { + if (obj.compareTo(params.max_memory as nextflow.util.MemoryUnit) == 1) + return params.max_memory as nextflow.util.MemoryUnit + else + return obj + } catch (all) { + println " ### ERROR ### Max memory '${params.max_memory}' is not valid! Using default value: $obj" + return obj + } + } else if (type == 'time') { + try { + if (obj.compareTo(params.max_time as nextflow.util.Duration) == 1) + return params.max_time as nextflow.util.Duration + else + return obj + } catch (all) { + println " ### ERROR ### Max time '${params.max_time}' is not valid! Using default value: $obj" + return obj + } + } else if (type == 'cpus') { + try { + return Math.min(obj, params.max_cpus as int) + } catch (all) { + println " ### ERROR ### Max cpus '${params.max_cpus}' is not valid! Using default value: $obj" + return obj + } + } +} + diff --git a/conf/pipeline/sarek/sbc_sharc.config b/conf/pipeline/sarek/sbc_sharc.config new file mode 100644 index 0000000..204d73b --- /dev/null +++ b/conf/pipeline/sarek/sbc_sharc.config @@ -0,0 +1,114 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + Sheffield Bioinformatics Core Configuration Profile - ShARC + +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + Custom Pipeline Resource Config for nf-core/sarek + +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + + +// process-specific resource requirements - reduced specification from those in sarek/conf/base.config + +process { + + + // process labels + + withLabel:process_low { + cpus = { check_max( 2 * task.attempt, 'cpus' ) } + memory = { check_max( 4.GB * task.attempt, 'memory' ) } + time = { check_max( 4.h * task.attempt, 'time' ) } + } + + withLabel:process_medium { + cpus = { check_max( 4 * task.attempt, 'cpus' ) } + memory = { check_max( 8.GB * task.attempt, 'memory' ) } + time = { check_max( 6.h * task.attempt, 'time' ) } + } + + withLabel:process_high { + cpus = { check_max( 8 * task.attempt, 'cpus' ) } + memory = { check_max( 16.GB * task.attempt, 'memory' ) } + time = { check_max( 8.h * task.attempt, 'time' ) } + } + + withLabel:process_long { + time = { check_max( 12.h * task.attempt, 'time' ) } + } + + withLabel:process_high_memory { + memory = { check_max( 60.GB * task.attempt, 'memory' ) } + } + + + // process name + + withName:'BWAMEM1_MEM|BWAMEM2_MEM' { + cpus = { check_max( 12 * task.attempt, 'cpus' ) } + memory = { check_max( 16.GB * task.attempt, 'memory' ) } + time = { check_max( 8.h * task.attempt, 'time' ) } + } + + withName:'FASTP' { + cpus = { check_max( 12 * task.attempt, 'cpus' ) } + } + + withName:'FASTQC|FASTP|MOSDEPTH|SAMTOOLS_CONVERT' { + memory = { check_max( 4.GB * task.attempt, 'memory' ) } + } + + withName:'GATK4_APPLYBQSR|GATK4_APPLYBQSR_SPARK|GATK4_BASERECALIBRATOR|SAMTOOLS_STATS' { + cpus = { check_max( 4 * task.attempt, 'cpus' ) } + } + + withName:'GATK4_APPLYBQSR|GATK4_APPLYBQSR_SPARK|GATK4_BASERECALIBRATOR|GATK4_GATHERBQSRREPORTS' { + memory = { check_max( 16.GB * task.attempt, 'memory' ) } + } + + withName:'GATK4_MARKDUPLICATES' { + memory = { check_max( 16.GB * task.attempt, 'memory' ) } + } + + withName:'FREEBAYES|SAMTOOLS_STATS|SAMTOOLS_INDEX|UNZIP' { + cpus = { check_max( 1 * task.attempt, 'cpus' ) } + } + +} + + +// function 'check_max()' to ensure that resource requirements don't go beyond maximum limit + +def check_max(obj, type) { + if (type == 'memory') { + try { + if (obj.compareTo(params.max_memory as nextflow.util.MemoryUnit) == 1) + return params.max_memory as nextflow.util.MemoryUnit + else + return obj + } catch (all) { + println " ### ERROR ### Max memory '${params.max_memory}' is not valid! Using default value: $obj" + return obj + } + } else if (type == 'time') { + try { + if (obj.compareTo(params.max_time as nextflow.util.Duration) == 1) + return params.max_time as nextflow.util.Duration + else + return obj + } catch (all) { + println " ### ERROR ### Max time '${params.max_time}' is not valid! Using default value: $obj" + return obj + } + } else if (type == 'cpus') { + try { + return Math.min(obj, params.max_cpus as int) + } catch (all) { + println " ### ERROR ### Max cpus '${params.max_cpus}' is not valid! Using default value: $obj" + return obj + } + } +} diff --git a/conf/sbc_sharc.config b/conf/sbc_sharc.config new file mode 100644 index 0000000..ca41185 --- /dev/null +++ b/conf/sbc_sharc.config @@ -0,0 +1,59 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + Sheffield Bioinformatics Core Configuration Profile - ShARC + +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + Base Institutional Configuration + +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + + +params { + + // nf-core specific parameters displayed in header summary of each run + + config_profile_description = 'Sheffield Bioinformatics Core - ShARC' + config_profile_contact = 'Lewis Quayle (l.quayle@sheffield.ac.uk)' + config_profile_url = 'https://docs.hpc.shef.ac.uk/en/latest/sharc/index.html' + + // hpc resource limits + + max_cpus = 16 + max_memory = 64.GB + max_time = 96.h + +} + + +// container engine + +singularity { + + enabled = true + autoMounts = true + // cacheDir = '////' + +} + + +// hpc configuration specific to ShARC + +process { + + // scheduler + + executor = 'sge' + penv = 'smp' + queue = { task.time <= 6.h ? 'shortint.q' : 'all.q' } + clusterOptions = { "-l rmem=${task.memory.toGiga()}G" } + + // error and retry handling + + errorStrategy = { task.exitStatus in [143,137,104,134,139,140] ? 'retry' : 'finish' } + maxRetries = 2 + +} + diff --git a/docs/sbc_sharc.md b/docs/sbc_sharc.md new file mode 100644 index 0000000..7b055d1 --- /dev/null +++ b/docs/sbc_sharc.md @@ -0,0 +1,355 @@ +# nf-core/configs: Sheffield Bioinformatics Core Facility ShARC Configuration + +The following nf-core pipelines have been successfully configured for use on the the [University of Sheffield ShARC cluster](https://docs.hpc.shef.ac.uk/en/latest/index.html): + +- [atacseq](https://nf-co.re/atacseq) +- [chipseq](https://nf-co.re/chipseq) +- [rnaseq](https://nf-co.re/rnaseq) +- [sarek](https://nf-co.re/sarek) + +When using [`sbc_sharc.config`](../conf/sbc_sharc.config) with the pipelines listed above, the appropriate configuration file from the list below will be loaded automatically: + +- atacseq: [sbc_sharc atacseq config](../conf/pipeline/atacseq/sbc_sharc.config) +- chipseq: [sbc_sharc chipseq config](../conf/pipeline/chipseq/sbc_sharc.config) +- rnaseq: [sbc_sharc rnaseq config](../conf/pipeline/rnaseq/sbc_sharc.config) +- sarek: [sbc_sharc sarek config](../conf/pipeline/sarek/sbc_sharc.config) + +The [`sbc_sharc.config`](../conf/sbc_sharc.config) configuration file might work with other nf-core pipelines as it stands but we cannot guarantee they will run without issue. We will be continuing to create, test and optimise configurations for new pipelines in the future. + + +## Using the SBC_ShARC Institutional Configuration Profile + +To use [`sbc_sharc.config`](../conf/sbc_sharc.config), run nextflow with an nf-core pipeline using `-profile sbc_sharc` (note the single hyphen). This will download and launch [`sbc_sharc.config`](../conf/sbc_sharc.config) which has been pre-configured with a setup suitable for the ShARC cluster and will automatically load the appropriate pipeline-specific configuration file. + +For a full guide on how to setup and run Nextflow using nf-core pipelines on ShARC, see the **Running Nextflow with nf-core Pipelines on ShARC** section below. + + +## A Note on Singularity Containers + +The [`sbc_sharc.config`](../conf/sbc_sharc.config) configuration file supports running nf-core pipelines with Singularity containers; Singularity images will be downloaded automatically before execution of the pipeline. + +Please read the **Configure Singularity for use with Nextflow and nf-core** sub-section below. + + +## Running Nextflow with nf-core Pipelines on ShARC + +Nextflow is not currently available on ShARC as an environmental software module. The most simple solution to this issue is to install Nextflow and nf-core using a personal install of miniconda. This guide will describe the main steps, which are to: + +1. Install miniconda as a personal software module +2. Load and configure conda +3. Install Nextflow and nf-core within a conda environment +4. Configure Singularity for use with Nextflow and nf-core +5. Setup your project directory and configure your run +6. Submit your run to the SGE scheduler + + +### 1. Install Miniconda as a Personal Software Module + +Connect to ShARC via SSH and login to a worker node via an interactive session. + +```shell +# login +ssh -X username@sharc.shef.ac.uk + +# request a command line only interactive session - some extra resources prevent issues building conda env later +qrsh -l rmem=4G -pe smp 2 +``` + +Navigate your folder within the data area of the file store. + +```shell +cd /data/$USER +``` + +Download and run the miniconda installer by running the following series of commands: + +```shell +# download the latest installer file +wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh + +# check the hashes match +sha256sum Miniconda3-latest-Linux-x86_64.sh + +# make the file executable +chmod +x Miniconda3-latest-Linux-x86_64.sh + +# run the installer +bash Miniconda3-latest-Linux-x86_64.sh +``` + +The miniconda installer will now run and walk you through the install process. There are two **IMPORTANT** things you must take note of at this point: + +1. You will have to overwrite the default install path when prompted by the miniconda installer to check the install path - the directory to which the install is attempted should be `/data/$USER/miniconda`. + +``` +Miniconda3 will now be installed into this location: +///miniconda3 + + - Press ENTER to confirm the location + - Press CTRL-C to abort the installation + - Or specify a different location below + +[///miniconda3] >>> /data/$USER/miniconda +``` + +2. **DO NOT** initialize miniconda at the end of the install process when prompted as shown here: + +``` +Do you wish the installer to initialize Miniconda3 +by running conda init? [yes|no] +[yes] >>> no +``` + +Once the installer has run, delete the installation script. + +```shell +rm Miniconda3-latest-Linux-x86_64.sh +``` + +Now make a modules folder and module file. + +```shell +# modules folder +mkdir /home/$USER/modules + +# module file +nano /home/$USER/modules/miniconda +``` + +Paste the below into the nano editor that opens upon running the final command. Note that this file is in Tcl not BASh, so environmental variable handing is different from the normal `$USER` for username. + +``` +#%Module10.2##################################################################### +## +## User Data Directory Miniconda module file +## +################################################################################ + +proc ModulesHelp { } { + global version + + puts stderr "Makes a user's personal install of Miniconda available." +} + +module-whatis "Makes a user's personal install of Miniconda available." + +# module variables + +set MINICONDA_DIR /data/$env(USER)/miniconda/bin + +prepend-path PATH $MINICONDA_DIR +``` + +Now run the following line to make your personal modules available for loading whenever you login. + +```shell +echo "module use /home/$USER/modules" >> ~/.bashrc +``` + +The last thing to note here is that you should not load the anaconda environmental module available to all HPC users and the personal miniconda module you have just made at the same time. + +For further information on making software available via a custom module file visit: + +[Making software available via a custom module file](https://docs.hpc.shef.ac.uk/en/latest/referenceinfo/environment-modules/creating-custom-modulefiles.html) + + +## 2. Load and Configure Conda + +Run the following commands in order and follow any prompts as appropriate: + +```shell +# load the miniconda module - if not already loaded +module load miniconda + +# disable base environment auto-activation +conda config --set auto_activate_base false + +# add the bioconda and conda-forge channels to conda configuration +conda config --add channels bioconda +conda config --add channels conda-forge + +# set channel_priority to "strict" +conda config --set channel_priority strict + +# ensure conda is up-to-date +conda update conda +``` + + +## 3. Install Nextflow and nf-core within a Conda Environment + +Run the following commands in order and follow any prompts as appropriate: + +```shell +# make the "nf_env" environment (in /home/$USER/.conda/envs/nf_env) +conda create --name nf_env nextflow nf-core + +# activate the environment +source activate nf_env + +# ensure all packages are up-to-date +conda update --all +``` + +You can now test the install has worked by running the following: + +```shell +# test the environment is working +nextflow info + +# test functionality +nextflow run hello +``` + +When you are finished, you can deactivate your conda environment using the command `conda deactivate`. + +Although you should not yet do this, should you wish to unload your personal miniconda module you can do so by running `module unload miniconda`. + +Step 5. describes the process of running an nf-core pipeline using Nextflow. You do not have to have a conda environment active for this part of the process as it will be loaded as part of your submission script, but you should not unload the miniconda module at this point. + + +## 4. Configure Singularity for use with Nextflow and nf-core + +When you run nextflow for the first time, Singularity will create a hidden directory `.singularity` in your `$HOME` directory `/home/$USER` which has very very limited (10GB) space available. It is therefore a good idea to create a directory somewhere else (e.g., `/data/$USER`) with more room and link the locations. To do this, run the following series of commands: + +```shell +# change directory to $HOME +cd $HOME + +# make the directory that will be linked to +mkdir /data/$USER/.singularity + +# link the new directory with the existing one +ln -s /data/$USER/.singularity .singularity +``` + + +## 5. Setup your Project and Configure your Run + +Whichever file store you decide to locate your project root directory in, the assumed project sub-directory structure within this guide is as follows: + +``` +/filestore/$USER/ +│ +└── project_root/ + │ + ├── config + ├── params + ├── sample_sheet + └── script +``` + +There are three things you will require to run an nf-core pipeline: + +1. A sample sheet +2. A pipeline launcher parameter configuration file +3. A submission script + +You can find nf-core pipelines by visiting [https://nf-co.re/pipelines](https://nf-co.re/pipelines). Each pipeline page has more information on how to use the pipeline as well as a full description of sample sheet requirements and formatting. + +Your sample sheet should be located inside your `sample_sheet` sub-directory. + +The general launch command in the script template below assumes you have configured your specific run using an nf-core pipeline launcher. For example, the launcher for the nf-core/rnaseq pipeline that can be found [here](https://nf-co.re/launch?pipeline=rnaseq). The parameters specified for your run using the launcher should be saved in a file named `nf-params.json` within the `params` sub-directory of your project root. + +To create your run script, navigate to the `script` sub-directory and run the following: + +```shell +nano nf_submission.sh +``` + +Paste the below into the editor ensuring to change the generic information for your own where indicated in the comment lines: + +```shell +#!/bin/bash + +## SGE scheduler flags + +# job name >>> edit "pipeline_name" for the name of the pipeline you are running e.g. rnaseq <<< +#$ -N nf-pipeline_name + +# specify queue and project for the nextflow driver job >>> keep and edit if using a priority queue else delete both <<< +#$ -q queue_name.q +#$ -P queue_name + +# request resources for the nextflow driver job +#$ -pe smp 1 +#$ -l rmem=2G + +# export environmental variables in current shell environment to job +#$ -V + +# send email >>> edit "username" <<< +#$ -M username@sheffield.ac.uk +#$ -m beas + +# merge standard error stream into the standard output stream +#$ -j y + +# output log file +#$ -o nextflow.log + + +## load miniconda module and activate analysis environment + +module load miniconda +source activate nf_env + + +## define and export variables + +# prevent java vm requesting too much memory and killing run +export NXF_OPTS="-Xms1g -Xmx2g" + +# path to singularity cache +export NXF_SINGULARITY_CACHEDIR="/home/$USER/.singularity" + +# project name >>> edit "project_name" so that it is the name of your project root directory <<< +PROJECT="project_name" + +# project directories >>> edit the name of the "filestore" e.g. fastdata <<< +PARAM_DIR="/filestore/$USER/$PROJECT/params" +CONFIG_DIR="/filestore/$USER/$PROJECT/config" + + +## run command >>> edit "pipeline" and "version" <<< + +nextflow run nf-core/pipeline \ +-r version \ +-profile sbc_sharc \ +-resume \ +-params-file ${PARAM_DIR}/nf-params.json + +``` + +Now save and exit by typing "Ctrl + O" then "Return" then "Ctrl + X". + +**OPTIONAL:** If you have specified a priority access queue in your submission script, you will need a personal configuration to send your jobs and not just your driver script to the appropriate queue. Navigate to the `config` sub-directory of your project folder and run the following: + +```shell +nano personal.config +``` + +Then paste the following into the editor, ensuring you enter the correct queue name: + +``` +process { + queue = 'queue-name.q' + clusterOptions = { "-P queue-name -l rmem=${task.memory.toGiga()}G" } +} +``` + +Save and exit by typing "Ctrl + O" then "Return" then "Ctrl + X". + +Now append `-c ${CONFIG_DIR}/personal.config` to the `nextflow run` command on a new line in your submission script. + + +## 6. Submit your Run to the SGE Scheduler + +Once you have fulfilled all of the requirements above, you should be ready to submit your batch job to the SGE scheduler on ShARC. From the project root, type the following: + +```bash +qsub ./scripts/nf_submission.sh +``` + +Your pipeline run should start momentarily. Good Luck! + diff --git a/nfcore_custom.config b/nfcore_custom.config index fae764b..abf163b 100644 --- a/nfcore_custom.config +++ b/nfcore_custom.config @@ -65,6 +65,7 @@ profiles { sage { includeConfig "${params.custom_config_base}/conf/sage.config" } sahmri { includeConfig "${params.custom_config_base}/conf/sahmri.config" } sanger { includeConfig "${params.custom_config_base}/conf/sanger.config"} + sbc_sharc { includeConfig "${params.custom_config_base}/conf/sbc_sharc.config"} seg_globe { includeConfig "${params.custom_config_base}/conf/seg_globe.config"} uct_hpc { includeConfig "${params.custom_config_base}/conf/uct_hpc.config" } unibe_ibu { includeConfig "${params.custom_config_base}/conf/unibe_ibu.config" } diff --git a/pipeline/atacseq.config b/pipeline/atacseq.config new file mode 100644 index 0000000..f205f62 --- /dev/null +++ b/pipeline/atacseq.config @@ -0,0 +1,13 @@ +/* + * ------------------------------------------------- + * nfcore/atacseq custom profile Nextflow config file + * ------------------------------------------------- + * Config options for custom environments. + * Cluster-specific config options should be saved + * in the conf/pipeline/atacseq folder and imported + * under a profile name here. + */ + +profiles { + sbc_sharc { includeConfig "${params.custom_config_base}/conf/pipeline/atacseq/sbc_sharc.config" } +} diff --git a/pipeline/chipseq.config b/pipeline/chipseq.config new file mode 100644 index 0000000..242aa92 --- /dev/null +++ b/pipeline/chipseq.config @@ -0,0 +1,13 @@ +/* + * ------------------------------------------------- + * nfcore/chipseq custom profile Nextflow config file + * ------------------------------------------------- + * Config options for custom environments. + * Cluster-specific config options should be saved + * in the conf/pipeline/chipseq folder and imported + * under a profile name here. + */ + +profiles { + sbc_sharc { includeConfig "${params.custom_config_base}/conf/pipeline/chipseq/sbc_sharc.config" } +} diff --git a/pipeline/rnaseq.config b/pipeline/rnaseq.config index 0486d86..b1d470f 100644 --- a/pipeline/rnaseq.config +++ b/pipeline/rnaseq.config @@ -11,5 +11,6 @@ profiles { eddie { includeConfig "${params.custom_config_base}/conf/pipeline/rnaseq/eddie.config" } mpcdf { includeConfig "${params.custom_config_base}/conf/pipeline/rnaseq/mpcdf.config" } + sbc_sharc { includeConfig "${params.custom_config_base}/conf/pipeline/rnaseq/sbc_sharc.config" } utd_sysbio { includeConfig "${params.custom_config_base}/conf/pipeline/rnaseq/utd_sysbio.config" } } diff --git a/pipeline/sarek.config b/pipeline/sarek.config index 12676b2..3c087aa 100644 --- a/pipeline/sarek.config +++ b/pipeline/sarek.config @@ -15,5 +15,6 @@ profiles { eddie { includeConfig "${params.custom_config_base}/conf/pipeline/sarek/eddie.config" } icr_davros { includeConfig "${params.custom_config_base}/conf/pipeline/sarek/icr_davros.config" } munin { includeConfig "${params.custom_config_base}/conf/pipeline/sarek/munin.config" } + sbc_sharc { includeConfig "${params.custom_config_base}/conf/pipeline/sarek/sbc_sharc.config" } uppmax { includeConfig "${params.custom_config_base}/conf/pipeline/sarek/uppmax.config" } } From 9fef35d7d3b4153eeade3af17fc08769bab000fb Mon Sep 17 00:00:00 2001 From: lquayle88 Date: Tue, 13 Sep 2022 17:08:56 +0100 Subject: [PATCH 86/91] Updated pipeline configs for atacseq and chipseq --- conf/pipeline/atacseq/sbc_sharc.config | 2 +- conf/pipeline/chipseq/sbc_sharc.config | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/conf/pipeline/atacseq/sbc_sharc.config b/conf/pipeline/atacseq/sbc_sharc.config index 2e987d2..e50695c 100644 --- a/conf/pipeline/atacseq/sbc_sharc.config +++ b/conf/pipeline/atacseq/sbc_sharc.config @@ -11,7 +11,7 @@ */ -// process-specific resource requirements - reduced specification from those in rnaseq/conf/base.config +// process-specific resource requirements - reduced specification from those in atacseq/conf/base.config process { diff --git a/conf/pipeline/chipseq/sbc_sharc.config b/conf/pipeline/chipseq/sbc_sharc.config index 2741453..60912f3 100644 --- a/conf/pipeline/chipseq/sbc_sharc.config +++ b/conf/pipeline/chipseq/sbc_sharc.config @@ -11,7 +11,7 @@ */ -// process-specific resource requirements - reduced specification from those in rnaseq/conf/base.config +// process-specific resource requirements - reduced specification from those in chipseq/conf/base.config process { From 61ae4636061d4f1296f363cc0929fb5e301bc5c7 Mon Sep 17 00:00:00 2001 From: Lewis Quayle <90088916+lquayle88@users.noreply.github.com> Date: Tue, 13 Sep 2022 17:13:57 +0100 Subject: [PATCH 87/91] Updated sbc_sharc.md --- docs/sbc_sharc.md | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/docs/sbc_sharc.md b/docs/sbc_sharc.md index 7b055d1..7d5bcd3 100644 --- a/docs/sbc_sharc.md +++ b/docs/sbc_sharc.md @@ -2,17 +2,17 @@ The following nf-core pipelines have been successfully configured for use on the the [University of Sheffield ShARC cluster](https://docs.hpc.shef.ac.uk/en/latest/index.html): -- [atacseq](https://nf-co.re/atacseq) -- [chipseq](https://nf-co.re/chipseq) -- [rnaseq](https://nf-co.re/rnaseq) -- [sarek](https://nf-co.re/sarek) +- [nf-co.re/atacseq](https://nf-co.re/atacseq) +- [nf-co.re/chipseq](https://nf-co.re/chipseq) +- [nf-co.re/rnaseq](https://nf-co.re/rnaseq) +- [nf-co.re/sarek](https://nf-co.re/sarek) When using [`sbc_sharc.config`](../conf/sbc_sharc.config) with the pipelines listed above, the appropriate configuration file from the list below will be loaded automatically: -- atacseq: [sbc_sharc atacseq config](../conf/pipeline/atacseq/sbc_sharc.config) -- chipseq: [sbc_sharc chipseq config](../conf/pipeline/chipseq/sbc_sharc.config) -- rnaseq: [sbc_sharc rnaseq config](../conf/pipeline/rnaseq/sbc_sharc.config) -- sarek: [sbc_sharc sarek config](../conf/pipeline/sarek/sbc_sharc.config) +- [atacseq sbc_sharc.config](../conf/pipeline/atacseq/sbc_sharc.config) +- [chipseq sbc_sharc.config](../conf/pipeline/chipseq/sbc_sharc.config) +- [rnaseq sbc_sharc.config](../conf/pipeline/rnaseq/sbc_sharc.config) +- [sarek sbc_sharc.config](../conf/pipeline/sarek/sbc_sharc.config) The [`sbc_sharc.config`](../conf/sbc_sharc.config) configuration file might work with other nf-core pipelines as it stands but we cannot guarantee they will run without issue. We will be continuing to create, test and optimise configurations for new pipelines in the future. From 3d3ee391beac2d24da61f50d29d38c7295a0bdac Mon Sep 17 00:00:00 2001 From: Bruno Grande Date: Tue, 13 Sep 2022 13:15:30 -0700 Subject: [PATCH 88/91] Limit the number of parallel transfers --- conf/sage.config | 3 +++ 1 file changed, 3 insertions(+) diff --git a/conf/sage.config b/conf/sage.config index d43e719..4692ed2 100644 --- a/conf/sage.config +++ b/conf/sage.config @@ -30,6 +30,9 @@ aws { client { uploadChunkSize = 209715200 } + batch { + maxParallelTransfers = 1 + } } executor { name = 'awsbatch' From 91bfa7341f6a79e36eb8610350230ab2c92d4cef Mon Sep 17 00:00:00 2001 From: lquayle88 Date: Wed, 14 Sep 2022 08:42:40 +0100 Subject: [PATCH 89/91] Made changes requested for PR --- README.md | 7 + conf/sbc_sharc.config | 2 - docs/pipeline/atacseq/sbc_sharc.md | 11 + docs/pipeline/chipseq/sbc_sharc.md | 11 + docs/pipeline/rnaseq/sbc_sharc.md | 11 + docs/pipeline/sarek/sbc_sharc.md | 11 + docs/sbc_sharc.md | 326 +---------------------------- 7 files changed, 58 insertions(+), 321 deletions(-) create mode 100644 docs/pipeline/atacseq/sbc_sharc.md create mode 100644 docs/pipeline/chipseq/sbc_sharc.md create mode 100644 docs/pipeline/rnaseq/sbc_sharc.md create mode 100644 docs/pipeline/sarek/sbc_sharc.md diff --git a/README.md b/README.md index 0cad49f..4544b05 100644 --- a/README.md +++ b/README.md @@ -198,6 +198,10 @@ Currently documentation is available for the following pipelines within specific - ampliseq - [BINAC](docs/pipeline/ampliseq/binac.md) - [UPPMAX](docs/pipeline/ampliseq/uppmax.md) +- atacseq + - [SBC_SHARC](docs/pipeline/atacseq/sbc_sharc.md) +- chipseq + - [SBC_SHARC](docs/pipeline/chipseq/sbc_sharc.md) - eager - [EVA](docs/pipeline/eager/eva.md) - mag @@ -205,11 +209,14 @@ Currently documentation is available for the following pipelines within specific - rnafusion - [HASTA](docs/pipeline/rnafusion/hasta.md) - [MUNIN](docs/pipeline/rnafusion/munin.md) +- rnaseq + - [SBC_SHARC](docs/pipeline/rnaseq/sbc_sharc.md) - rnavar - [MUNIN](docs/pipeline/rnavar/munin.md) - sarek - [Cancer Research UK Manchester Institute](docs/pipeline/sarek/crukmi.md) - [MUNIN](docs/pipeline/sarek/munin.md) + - [SBC_SHARC](docs/pipeline/sarek/sbc_sharc.md) - [UPPMAX](docs/pipeline/sarek/uppmax.md) - taxprofiler - [EVA](docs/pipeline/taxprofiler/eva.md) diff --git a/conf/sbc_sharc.config b/conf/sbc_sharc.config index ca41185..20b8661 100644 --- a/conf/sbc_sharc.config +++ b/conf/sbc_sharc.config @@ -34,7 +34,6 @@ singularity { enabled = true autoMounts = true - // cacheDir = '////' } @@ -56,4 +55,3 @@ process { maxRetries = 2 } - diff --git a/docs/pipeline/atacseq/sbc_sharc.md b/docs/pipeline/atacseq/sbc_sharc.md new file mode 100644 index 0000000..f73d79b --- /dev/null +++ b/docs/pipeline/atacseq/sbc_sharc.md @@ -0,0 +1,11 @@ +# nf-core/configs: ATAC-Seq Specific Configuration - Sheffield Bioinformatics Core Facility ShARC + +Specific configuration for [nf-co.re/atacseq](https://nf-co.re/atacseq) pipeline + +## Usage + +To use, run nextflow with the pipeline using `-profile sbc_sharc` (note the single hyphen). + +This will download and launch the sarek specific [`sbc_sharc.config`](../../../conf/pipeline/atacseq/sbc_sharc.config) which has been pre-configured with a setup suitable for the [University of Sheffield ShARC cluster](https://docs.hpc.shef.ac.uk/en/latest/index.html) and will automatically load the appropriate pipeline-specific configuration file. + +Example: `nextflow run nf-core/atacseq -profile sbc_sharc` diff --git a/docs/pipeline/chipseq/sbc_sharc.md b/docs/pipeline/chipseq/sbc_sharc.md new file mode 100644 index 0000000..31baba1 --- /dev/null +++ b/docs/pipeline/chipseq/sbc_sharc.md @@ -0,0 +1,11 @@ +# nf-core/configs: ChIP-Seq Specific Configuration - Sheffield Bioinformatics Core Facility ShARC + +Specific configuration for [nf-co.re/chipseq](https://nf-co.re/chipseq) pipeline + +## Usage + +To use, run nextflow with the pipeline using `-profile sbc_sharc` (note the single hyphen). + +This will download and launch the sarek specific [`sbc_sharc.config`](../../../conf/pipeline/chipseq/sbc_sharc.config) which has been pre-configured with a setup suitable for the [University of Sheffield ShARC cluster](https://docs.hpc.shef.ac.uk/en/latest/index.html) and will automatically load the appropriate pipeline-specific configuration file. + +Example: `nextflow run nf-core/chipseq -profile sbc_sharc` diff --git a/docs/pipeline/rnaseq/sbc_sharc.md b/docs/pipeline/rnaseq/sbc_sharc.md new file mode 100644 index 0000000..562f84d --- /dev/null +++ b/docs/pipeline/rnaseq/sbc_sharc.md @@ -0,0 +1,11 @@ +# nf-core/configs: RNA-Seq Specific Configuration - Sheffield Bioinformatics Core Facility ShARC + +Specific configuration for [nf-co.re/rnaseq](https://nf-co.re/rnaseq) pipeline + +## Usage + +To use, run nextflow with the pipeline using `-profile sbc_sharc` (note the single hyphen). + +This will download and launch the sarek specific [`sbc_sharc.config`](../../../conf/pipeline/rnaseq/sbc_sharc.config) which has been pre-configured with a setup suitable for the [University of Sheffield ShARC cluster](https://docs.hpc.shef.ac.uk/en/latest/index.html) and will automatically load the appropriate pipeline-specific configuration file. + +Example: `nextflow run nf-core/rnaseq -profile sbc_sharc` diff --git a/docs/pipeline/sarek/sbc_sharc.md b/docs/pipeline/sarek/sbc_sharc.md new file mode 100644 index 0000000..361be18 --- /dev/null +++ b/docs/pipeline/sarek/sbc_sharc.md @@ -0,0 +1,11 @@ +# nf-core/configs: Sarek Specific Configuration - Sheffield Bioinformatics Core Facility ShARC + +Specific configuration for [nf-co.re/sarek](https://nf-co.re/sarek) pipeline + +## Usage + +To use, run nextflow with the pipeline using `-profile sbc_sharc` (note the single hyphen). + +This will download and launch the sarek specific [`sbc_sharc.config`](../../../conf/pipeline/sarek/sbc_sharc.config) which has been pre-configured with a setup suitable for the [University of Sheffield ShARC cluster](https://docs.hpc.shef.ac.uk/en/latest/index.html) and will automatically load the appropriate pipeline-specific configuration file. + +Example: `nextflow run nf-core/sarek -profile sbc_sharc` diff --git a/docs/sbc_sharc.md b/docs/sbc_sharc.md index 7d5bcd3..a40b7ea 100644 --- a/docs/sbc_sharc.md +++ b/docs/sbc_sharc.md @@ -1,5 +1,12 @@ # nf-core/configs: Sheffield Bioinformatics Core Facility ShARC Configuration + +## Using the SBC_ShARC Institutional Configuration Profile + +To use [`sbc_sharc.config`](../conf/sbc_sharc.config), run nextflow with an nf-core pipeline using `-profile sbc_sharc` (note the single hyphen). + +This will download and launch [`sbc_sharc.config`](../conf/sbc_sharc.config) which has been pre-configured with a setup suitable for the ShARC cluster and will automatically load the appropriate pipeline-specific configuration file. + The following nf-core pipelines have been successfully configured for use on the the [University of Sheffield ShARC cluster](https://docs.hpc.shef.ac.uk/en/latest/index.html): - [nf-co.re/atacseq](https://nf-co.re/atacseq) @@ -17,199 +24,10 @@ When using [`sbc_sharc.config`](../conf/sbc_sharc.config) with the pipelines lis The [`sbc_sharc.config`](../conf/sbc_sharc.config) configuration file might work with other nf-core pipelines as it stands but we cannot guarantee they will run without issue. We will be continuing to create, test and optimise configurations for new pipelines in the future. -## Using the SBC_ShARC Institutional Configuration Profile - -To use [`sbc_sharc.config`](../conf/sbc_sharc.config), run nextflow with an nf-core pipeline using `-profile sbc_sharc` (note the single hyphen). This will download and launch [`sbc_sharc.config`](../conf/sbc_sharc.config) which has been pre-configured with a setup suitable for the ShARC cluster and will automatically load the appropriate pipeline-specific configuration file. - -For a full guide on how to setup and run Nextflow using nf-core pipelines on ShARC, see the **Running Nextflow with nf-core Pipelines on ShARC** section below. - - ## A Note on Singularity Containers The [`sbc_sharc.config`](../conf/sbc_sharc.config) configuration file supports running nf-core pipelines with Singularity containers; Singularity images will be downloaded automatically before execution of the pipeline. -Please read the **Configure Singularity for use with Nextflow and nf-core** sub-section below. - - -## Running Nextflow with nf-core Pipelines on ShARC - -Nextflow is not currently available on ShARC as an environmental software module. The most simple solution to this issue is to install Nextflow and nf-core using a personal install of miniconda. This guide will describe the main steps, which are to: - -1. Install miniconda as a personal software module -2. Load and configure conda -3. Install Nextflow and nf-core within a conda environment -4. Configure Singularity for use with Nextflow and nf-core -5. Setup your project directory and configure your run -6. Submit your run to the SGE scheduler - - -### 1. Install Miniconda as a Personal Software Module - -Connect to ShARC via SSH and login to a worker node via an interactive session. - -```shell -# login -ssh -X username@sharc.shef.ac.uk - -# request a command line only interactive session - some extra resources prevent issues building conda env later -qrsh -l rmem=4G -pe smp 2 -``` - -Navigate your folder within the data area of the file store. - -```shell -cd /data/$USER -``` - -Download and run the miniconda installer by running the following series of commands: - -```shell -# download the latest installer file -wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh - -# check the hashes match -sha256sum Miniconda3-latest-Linux-x86_64.sh - -# make the file executable -chmod +x Miniconda3-latest-Linux-x86_64.sh - -# run the installer -bash Miniconda3-latest-Linux-x86_64.sh -``` - -The miniconda installer will now run and walk you through the install process. There are two **IMPORTANT** things you must take note of at this point: - -1. You will have to overwrite the default install path when prompted by the miniconda installer to check the install path - the directory to which the install is attempted should be `/data/$USER/miniconda`. - -``` -Miniconda3 will now be installed into this location: -///miniconda3 - - - Press ENTER to confirm the location - - Press CTRL-C to abort the installation - - Or specify a different location below - -[///miniconda3] >>> /data/$USER/miniconda -``` - -2. **DO NOT** initialize miniconda at the end of the install process when prompted as shown here: - -``` -Do you wish the installer to initialize Miniconda3 -by running conda init? [yes|no] -[yes] >>> no -``` - -Once the installer has run, delete the installation script. - -```shell -rm Miniconda3-latest-Linux-x86_64.sh -``` - -Now make a modules folder and module file. - -```shell -# modules folder -mkdir /home/$USER/modules - -# module file -nano /home/$USER/modules/miniconda -``` - -Paste the below into the nano editor that opens upon running the final command. Note that this file is in Tcl not BASh, so environmental variable handing is different from the normal `$USER` for username. - -``` -#%Module10.2##################################################################### -## -## User Data Directory Miniconda module file -## -################################################################################ - -proc ModulesHelp { } { - global version - - puts stderr "Makes a user's personal install of Miniconda available." -} - -module-whatis "Makes a user's personal install of Miniconda available." - -# module variables - -set MINICONDA_DIR /data/$env(USER)/miniconda/bin - -prepend-path PATH $MINICONDA_DIR -``` - -Now run the following line to make your personal modules available for loading whenever you login. - -```shell -echo "module use /home/$USER/modules" >> ~/.bashrc -``` - -The last thing to note here is that you should not load the anaconda environmental module available to all HPC users and the personal miniconda module you have just made at the same time. - -For further information on making software available via a custom module file visit: - -[Making software available via a custom module file](https://docs.hpc.shef.ac.uk/en/latest/referenceinfo/environment-modules/creating-custom-modulefiles.html) - - -## 2. Load and Configure Conda - -Run the following commands in order and follow any prompts as appropriate: - -```shell -# load the miniconda module - if not already loaded -module load miniconda - -# disable base environment auto-activation -conda config --set auto_activate_base false - -# add the bioconda and conda-forge channels to conda configuration -conda config --add channels bioconda -conda config --add channels conda-forge - -# set channel_priority to "strict" -conda config --set channel_priority strict - -# ensure conda is up-to-date -conda update conda -``` - - -## 3. Install Nextflow and nf-core within a Conda Environment - -Run the following commands in order and follow any prompts as appropriate: - -```shell -# make the "nf_env" environment (in /home/$USER/.conda/envs/nf_env) -conda create --name nf_env nextflow nf-core - -# activate the environment -source activate nf_env - -# ensure all packages are up-to-date -conda update --all -``` - -You can now test the install has worked by running the following: - -```shell -# test the environment is working -nextflow info - -# test functionality -nextflow run hello -``` - -When you are finished, you can deactivate your conda environment using the command `conda deactivate`. - -Although you should not yet do this, should you wish to unload your personal miniconda module you can do so by running `module unload miniconda`. - -Step 5. describes the process of running an nf-core pipeline using Nextflow. You do not have to have a conda environment active for this part of the process as it will be loaded as part of your submission script, but you should not unload the miniconda module at this point. - - -## 4. Configure Singularity for use with Nextflow and nf-core - When you run nextflow for the first time, Singularity will create a hidden directory `.singularity` in your `$HOME` directory `/home/$USER` which has very very limited (10GB) space available. It is therefore a good idea to create a directory somewhere else (e.g., `/data/$USER`) with more room and link the locations. To do this, run the following series of commands: ```shell @@ -223,133 +41,3 @@ mkdir /data/$USER/.singularity ln -s /data/$USER/.singularity .singularity ``` - -## 5. Setup your Project and Configure your Run - -Whichever file store you decide to locate your project root directory in, the assumed project sub-directory structure within this guide is as follows: - -``` -/filestore/$USER/ -│ -└── project_root/ - │ - ├── config - ├── params - ├── sample_sheet - └── script -``` - -There are three things you will require to run an nf-core pipeline: - -1. A sample sheet -2. A pipeline launcher parameter configuration file -3. A submission script - -You can find nf-core pipelines by visiting [https://nf-co.re/pipelines](https://nf-co.re/pipelines). Each pipeline page has more information on how to use the pipeline as well as a full description of sample sheet requirements and formatting. - -Your sample sheet should be located inside your `sample_sheet` sub-directory. - -The general launch command in the script template below assumes you have configured your specific run using an nf-core pipeline launcher. For example, the launcher for the nf-core/rnaseq pipeline that can be found [here](https://nf-co.re/launch?pipeline=rnaseq). The parameters specified for your run using the launcher should be saved in a file named `nf-params.json` within the `params` sub-directory of your project root. - -To create your run script, navigate to the `script` sub-directory and run the following: - -```shell -nano nf_submission.sh -``` - -Paste the below into the editor ensuring to change the generic information for your own where indicated in the comment lines: - -```shell -#!/bin/bash - -## SGE scheduler flags - -# job name >>> edit "pipeline_name" for the name of the pipeline you are running e.g. rnaseq <<< -#$ -N nf-pipeline_name - -# specify queue and project for the nextflow driver job >>> keep and edit if using a priority queue else delete both <<< -#$ -q queue_name.q -#$ -P queue_name - -# request resources for the nextflow driver job -#$ -pe smp 1 -#$ -l rmem=2G - -# export environmental variables in current shell environment to job -#$ -V - -# send email >>> edit "username" <<< -#$ -M username@sheffield.ac.uk -#$ -m beas - -# merge standard error stream into the standard output stream -#$ -j y - -# output log file -#$ -o nextflow.log - - -## load miniconda module and activate analysis environment - -module load miniconda -source activate nf_env - - -## define and export variables - -# prevent java vm requesting too much memory and killing run -export NXF_OPTS="-Xms1g -Xmx2g" - -# path to singularity cache -export NXF_SINGULARITY_CACHEDIR="/home/$USER/.singularity" - -# project name >>> edit "project_name" so that it is the name of your project root directory <<< -PROJECT="project_name" - -# project directories >>> edit the name of the "filestore" e.g. fastdata <<< -PARAM_DIR="/filestore/$USER/$PROJECT/params" -CONFIG_DIR="/filestore/$USER/$PROJECT/config" - - -## run command >>> edit "pipeline" and "version" <<< - -nextflow run nf-core/pipeline \ --r version \ --profile sbc_sharc \ --resume \ --params-file ${PARAM_DIR}/nf-params.json - -``` - -Now save and exit by typing "Ctrl + O" then "Return" then "Ctrl + X". - -**OPTIONAL:** If you have specified a priority access queue in your submission script, you will need a personal configuration to send your jobs and not just your driver script to the appropriate queue. Navigate to the `config` sub-directory of your project folder and run the following: - -```shell -nano personal.config -``` - -Then paste the following into the editor, ensuring you enter the correct queue name: - -``` -process { - queue = 'queue-name.q' - clusterOptions = { "-P queue-name -l rmem=${task.memory.toGiga()}G" } -} -``` - -Save and exit by typing "Ctrl + O" then "Return" then "Ctrl + X". - -Now append `-c ${CONFIG_DIR}/personal.config` to the `nextflow run` command on a new line in your submission script. - - -## 6. Submit your Run to the SGE Scheduler - -Once you have fulfilled all of the requirements above, you should be ready to submit your batch job to the SGE scheduler on ShARC. From the project root, type the following: - -```bash -qsub ./scripts/nf_submission.sh -``` - -Your pipeline run should start momentarily. Good Luck! - From 32403b6222676adf4fd0b5d8b7f7d27478d028d2 Mon Sep 17 00:00:00 2001 From: lquayle88 Date: Wed, 14 Sep 2022 08:46:11 +0100 Subject: [PATCH 90/91] Updated pipeline config docs for atacseq, chipseq and rnaseq --- docs/pipeline/atacseq/sbc_sharc.md | 2 +- docs/pipeline/chipseq/sbc_sharc.md | 2 +- docs/pipeline/rnaseq/sbc_sharc.md | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/pipeline/atacseq/sbc_sharc.md b/docs/pipeline/atacseq/sbc_sharc.md index f73d79b..1f33453 100644 --- a/docs/pipeline/atacseq/sbc_sharc.md +++ b/docs/pipeline/atacseq/sbc_sharc.md @@ -6,6 +6,6 @@ Specific configuration for [nf-co.re/atacseq](https://nf-co.re/atacseq) pipeline To use, run nextflow with the pipeline using `-profile sbc_sharc` (note the single hyphen). -This will download and launch the sarek specific [`sbc_sharc.config`](../../../conf/pipeline/atacseq/sbc_sharc.config) which has been pre-configured with a setup suitable for the [University of Sheffield ShARC cluster](https://docs.hpc.shef.ac.uk/en/latest/index.html) and will automatically load the appropriate pipeline-specific configuration file. +This will download and launch the atacseq specific [`sbc_sharc.config`](../../../conf/pipeline/atacseq/sbc_sharc.config) which has been pre-configured with a setup suitable for the [University of Sheffield ShARC cluster](https://docs.hpc.shef.ac.uk/en/latest/index.html) and will automatically load the appropriate pipeline-specific configuration file. Example: `nextflow run nf-core/atacseq -profile sbc_sharc` diff --git a/docs/pipeline/chipseq/sbc_sharc.md b/docs/pipeline/chipseq/sbc_sharc.md index 31baba1..4280db9 100644 --- a/docs/pipeline/chipseq/sbc_sharc.md +++ b/docs/pipeline/chipseq/sbc_sharc.md @@ -6,6 +6,6 @@ Specific configuration for [nf-co.re/chipseq](https://nf-co.re/chipseq) pipeline To use, run nextflow with the pipeline using `-profile sbc_sharc` (note the single hyphen). -This will download and launch the sarek specific [`sbc_sharc.config`](../../../conf/pipeline/chipseq/sbc_sharc.config) which has been pre-configured with a setup suitable for the [University of Sheffield ShARC cluster](https://docs.hpc.shef.ac.uk/en/latest/index.html) and will automatically load the appropriate pipeline-specific configuration file. +This will download and launch the chipseq specific [`sbc_sharc.config`](../../../conf/pipeline/chipseq/sbc_sharc.config) which has been pre-configured with a setup suitable for the [University of Sheffield ShARC cluster](https://docs.hpc.shef.ac.uk/en/latest/index.html) and will automatically load the appropriate pipeline-specific configuration file. Example: `nextflow run nf-core/chipseq -profile sbc_sharc` diff --git a/docs/pipeline/rnaseq/sbc_sharc.md b/docs/pipeline/rnaseq/sbc_sharc.md index 562f84d..d62fe25 100644 --- a/docs/pipeline/rnaseq/sbc_sharc.md +++ b/docs/pipeline/rnaseq/sbc_sharc.md @@ -6,6 +6,6 @@ Specific configuration for [nf-co.re/rnaseq](https://nf-co.re/rnaseq) pipeline To use, run nextflow with the pipeline using `-profile sbc_sharc` (note the single hyphen). -This will download and launch the sarek specific [`sbc_sharc.config`](../../../conf/pipeline/rnaseq/sbc_sharc.config) which has been pre-configured with a setup suitable for the [University of Sheffield ShARC cluster](https://docs.hpc.shef.ac.uk/en/latest/index.html) and will automatically load the appropriate pipeline-specific configuration file. +This will download and launch the rnaseq specific [`sbc_sharc.config`](../../../conf/pipeline/rnaseq/sbc_sharc.config) which has been pre-configured with a setup suitable for the [University of Sheffield ShARC cluster](https://docs.hpc.shef.ac.uk/en/latest/index.html) and will automatically load the appropriate pipeline-specific configuration file. Example: `nextflow run nf-core/rnaseq -profile sbc_sharc` From be356eb400c8dce34c1638d5754b44ad7d167efa Mon Sep 17 00:00:00 2001 From: lquayle88 Date: Wed, 14 Sep 2022 10:24:56 +0100 Subject: [PATCH 91/91] Ran prettier on docs/sbc_sharc.md --- docs/sbc_sharc.md | 3 --- 1 file changed, 3 deletions(-) diff --git a/docs/sbc_sharc.md b/docs/sbc_sharc.md index a40b7ea..f82b348 100644 --- a/docs/sbc_sharc.md +++ b/docs/sbc_sharc.md @@ -1,6 +1,5 @@ # nf-core/configs: Sheffield Bioinformatics Core Facility ShARC Configuration - ## Using the SBC_ShARC Institutional Configuration Profile To use [`sbc_sharc.config`](../conf/sbc_sharc.config), run nextflow with an nf-core pipeline using `-profile sbc_sharc` (note the single hyphen). @@ -23,7 +22,6 @@ When using [`sbc_sharc.config`](../conf/sbc_sharc.config) with the pipelines lis The [`sbc_sharc.config`](../conf/sbc_sharc.config) configuration file might work with other nf-core pipelines as it stands but we cannot guarantee they will run without issue. We will be continuing to create, test and optimise configurations for new pipelines in the future. - ## A Note on Singularity Containers The [`sbc_sharc.config`](../conf/sbc_sharc.config) configuration file supports running nf-core pipelines with Singularity containers; Singularity images will be downloaded automatically before execution of the pipeline. @@ -40,4 +38,3 @@ mkdir /data/$USER/.singularity # link the new directory with the existing one ln -s /data/$USER/.singularity .singularity ``` -