Merge branch 'master' into gis-branch

2024-11-23 08:49:54 +00:00 · 2022-10-04 21:59:27 +02:00 · 2022-10-04 21:59:27 +02:00 · 8e217589b7
commit 8e217589b7
parent 34441a8f82 a9781042c7
11 changed files with 87 additions and 12 deletions
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@ -69,6 +69,7 @@ jobs:
          - "jax"
          - "ku_sund_dangpu"
          - "lugh"
+          - "mana"
          - "marvin"
          - "medair"
          - "mjolnir_globe"
--- a/README.md
+++ b/README.md
@ -125,6 +125,7 @@ Currently documentation is available for the following systems:
 - [KU SUND DANGPU](docs/ku_sund_dangpu.md)
 - [LUGH](docs/lugh.md)
 - [MAESTRO](docs/maestro.md)
+- [Mana](docs/mana.md)
 - [MARVIN](docs/marvin.md)
 - [MEDAIR](docs/medair.md)
 - [MJOLNIR_GLOBE](docs/mjolnir_globe.md)
--- a/conf/biohpc_gen.config
+++ b/conf/biohpc_gen.config
@ -11,9 +11,8 @@ env {

 process {
  executor = 'slurm'
-  queue = { task.memory <= 1536.GB ? (task.time > 2.d || task.memory > 384.GB ? 'biohpc_gen_production' : 'biohpc_gen_normal') : 'biohpc_gen_highmem' }
-  beforeScript = 'module use /dss/dsslegfs02/pn73se/pn73se-dss-0000/spack/modules/x86_avx2/linux*'
-  module = 'charliecloud/0.22:miniconda3'
+  queue    = { task.memory <= 1536.GB ? (task.time > 2.d || task.memory > 384.GB ? 'biohpc_gen_production' : 'biohpc_gen_normal') : 'biohpc_gen_highmem' }
+  module   = 'charliecloud/0.25'
 }

 charliecloud {
@ -21,7 +20,7 @@ charliecloud {
 }

 params {
-  params.max_time = 14.d
-  params.max_cpus = 80
+  params.max_time   = 14.d
+  params.max_cpus   = 80
  params.max_memory = 3.TB
 }
--- a/conf/ccga_med.config
+++ b/conf/ccga_med.config
@ -13,8 +13,8 @@ params {

 singularity {
  enabled = true
-  runOptions = "-B /work_ifs -B /scratch"
-  cacheDir = "/work_ifs/ikmb_repository/singularity_cache/"
+  runOptions = "-B /work_ifs -B /scratch -B /work_beegfs"
+  cacheDir = "/work_beegfs/ikmb_repository/singularity_cache/"
 }

 executor {
@ -31,7 +31,7 @@ process {

 params {
  // illumina iGenomes reference file paths on RZCluster
-  igenomes_base = '/work_ifs/ikmb_repository/references/iGenomes/references/'
+  igenomes_base = '/work_beegfs/ikmb_repository/references/iGenomes/references/'
  saveReference = true
  max_memory = 250.GB
  max_cpus = 24
--- a/conf/mana.config
+++ b/conf/mana.config
@ -0,0 +1,21 @@
+params {
+  config_profile_description = 'University of Hawaii at Manoa'
+  config_profile_url = 'http://www.hawaii.edu/its/ci/'
+  config_profile_contact = 'Cedric Arisdakessian'
+
+  max_memory = 400.GB
+  max_cpus = 96
+  max_time = 72.h
+}
+
+process {
+  executor = 'slurm'
+  queue = 'shared,exclusive,kill-shared,kill-exclusive'
+  module = 'tools/Singularity'
+}
+
+singularity {
+  enabled = true
+  cacheDir = "$HOME/.singularity_images_cache"
+  autoMounts = true
+}
--- a/conf/pipeline/mag/eva.config
+++ b/conf/pipeline/mag/eva.config
@ -4,6 +4,11 @@ params {
  config_profile_description = 'nf-core/mag EVA profile provided by nf-core/configs'
 }

+env {
+    OPENBLAS_NUM_THREADS=1
+    OMP_NUM_THREADS=1
+}
+
 process {

    withName: FASTQC {
--- a/conf/sage.config
+++ b/conf/sage.config
@ -29,9 +29,12 @@ aws {
  region = "us-east-1"
  client {
    uploadChunkSize = 209715200
+    uploadMaxThreads = 4
  }
  batch {
    maxParallelTransfers = 1
+    maxTransferAttempts = 5
+    delayBetweenAttempts = '120 sec'
  }
 }
 executor {
--- a/docs/biohpc_gen.md
+++ b/docs/biohpc_gen.md
@ -4,14 +4,12 @@ All nf-core pipelines have been successfully configured for use on the BioHPC Ge

 To use, run the pipeline with `-profile biohpc_gen`. This will download and launch the [`biohpc_gen.config`](../conf/biohpc_gen.config) which has been pre-configured with a setup suitable for the biohpc_gen cluster. Using this profile, a docker image containing all of the required software will be downloaded, and converted to a Charliecloud container before execution of the pipeline.

-Before running the pipeline you will need to load Nextflow and Charliecloud using the environment module system on biohpc_gen. You can do this by issuing the commands below:
+Before running the pipeline you will need to load Nextflow and Charliecloud using the environment module system on a login node. You can do this by issuing the commands below:

 ```bash
 ## Load Nextflow and Charliecloud environment modules
-module purge
-module load nextflow charliecloud/0.22
+module load nextflow/21.04.3 charliecloud/0.25
 ```

-> NB: Charliecloud support requires Nextflow version `21.03.0-edge` or later.
 > NB: You will need an account to use the LRZ Linux cluster as well as group access to the biohpc_gen cluster in order to run nf-core pipelines.
 > NB: Nextflow will need to submit the jobs via the job scheduler to the HPC cluster and as such the commands above will have to be executed on one of the login nodes.
--- a/docs/mana.md
+++ b/docs/mana.md
@ -0,0 +1,45 @@
+# nf-core/configs Mana (at University of Hawaii at Manoa) Configuration
+
+To use, run the pipeline with `-profile mana`. It will use the following parameters for Mana (UHM HPCC):
+
+- Load singularity and use it as default container technology
+- Setup a container cache directory in your home (~/.singularity_images_cache)
+- Select appropriate queues (currently: `shared,exclusive,kill-shared,kill-exclusive`)
+- Set the maximum available resources (available in 09/02/2022):
+  - CPUs: 96
+  - Memory: 400.GB
+  - Time: 72.h
+
+## Pre-requisites
+
+In order to run a nf-core pipeline on Mana, you will need to setup nextflow in your environment.
+At the moment, nextflow is not available as a module (but might be in the future).
+
+### Install nextflow in a conda environment
+
+Before we start, we will need to work on an interactive node (currently, mana doesn't let you execute any program in the login node):
+
+```bash
+# Request an interactive sandbox node for 30 min
+srun --pty -t 30 -p sandbox /bin/bash
+```
+
+To setup nextflow on your account, follow these steps.
+
+```bash
+# Load the latest anaconda3 module
+module load lang/Anaconda3/2022.05
+
+# Initialize environment
+. $(conda info --base)/etc/profile.d/conda.sh
+
+# Install nextflow (here in base environment, but you can create a new one if you'd like)
+conda install -c bioconda nextflow
+```
+
+If you want these settings to be persistent, you can add the first 2 commands in your .bash_profile file like this:
+
+```bash
+echo "module load lang/Anaconda3/2022.05" >> ~/.bash_profile
+echo "$(conda info --base)/etc/profile.d/conda.sh" >> ~/.bash_profile
+```
--- a/docs/sage.md
+++ b/docs/sage.md
@ -8,6 +8,7 @@ This global configuration includes the following tweaks:
 - Enable retries by default when exit codes relate to insufficient memory
 - Allow pending jobs to finish if the number of retries are exhausted
 - Increase the amount of time allowed for file transfers
+- Improve reliability of file transfers with retries and reduced concurrency
 - Increase the default chunk size for multipart uploads to S3
 - Slow down job submission rate to avoid overwhelming any APIs
 - Define the `check_max()` function, which is missing in Sarek v2
--- a/nfcore_custom.config
+++ b/nfcore_custom.config
@ -52,6 +52,7 @@ profiles {
  ku_sund_dangpu {includeConfig "${params.custom_config_base}/conf/ku_sund_dangpu.config"}
  lugh         { includeConfig "${params.custom_config_base}/conf/lugh.config" }
  maestro      { includeConfig "${params.custom_config_base}/conf/maestro.config" }
+  mana         { includeConfig "${params.custom_config_base}/conf/mana.config" }
  marvin       { includeConfig "${params.custom_config_base}/conf/marvin.config" }
  medair       { includeConfig "${params.custom_config_base}/conf/medair.config" }
  mjolnir_globe { includeConfig "${params.custom_config_base}/conf/mjolnir_globe.config" }