Merge pull request #309 from kevbrick/master

Added config for NIH Biowulf Cluster
2024-12-22 02:38:16 +00:00 · 2021-12-20 13:05:19 -05:00 · 2021-12-20 13:05:19 -05:00 · 1abb1c2d95
commit 1abb1c2d95
parent 38295dfbbd 82c7d553a7
5 changed files with 70 additions and 0 deletions
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@ -54,6 +54,7 @@ jobs:
            - 'mpcdf'
            - 'munin'
            - 'nu_genomics'
+            - 'nihbiowulf'
            - 'oist'
            - 'pasteur'
            - 'phoenix'
--- a/README.md
+++ b/README.md
@ -119,6 +119,7 @@ Currently documentation is available for the following systems:
 * [MPCDF](docs/mpcdf.md)
 * [MUNIN](docs/munin.md)
 * [NU_GENOMICS](docs/nu_genomics.md)
+* [NIHBIOWULF](docs/nihbiowulf.md)
 * [OIST](docs/oist.md)
 * [PASTEUR](docs/pasteur.md)
 * [PHOENIX](docs/phoenix.md)
--- a/conf/nihbiowulf.config
+++ b/conf/nihbiowulf.config
@ -0,0 +1,42 @@
+//Profile config names for nf-core/configs
+params {
+  config_profile_description = 'National Institutes of Health, USA: Biowulf nf-core config'
+  config_profile_contact = 'Kevin Brick (@kevbrick)'
+  config_profile_url = 'https://hpc.nih.gov/apps/nextflow.html'
+  max_memory = 224.GB	
+  max_cpus = 32
+  max_time = 72.h
+
+  igenomes_base = '/fdb/igenomes/'
+}
+
+process {
+  scratch = '/lscratch/$SLURM_JOBID'
+  maxForks = 100
+}
+
+profiles {
+  local {
+    process.executor = 'local'
+  }
+  
+  slurm {
+    process.executor = 'slurm'
+    executor.$slurm.pollInterval = '1 min'
+    executor.$slurm.queueStatInterval = '5 min'
+    executor.queueSize = 100
+    executor.$slurm.submitRateLimit = '6/1min'
+    process.clusterOptions = ' --gres=lscratch:600 --signal USR2@20'
+  }
+}
+	
+singularity {
+  enabled = true
+  autoMounts = true
+  envWhitelist='https_proxy,http_proxy,ftp_proxy,DISPLAY,SLURM_JOBID'
+  
+  //As names change, the bind paths required may change too. To chack that everything is being captured:
+  //Run '. /usr/local/current/singularity/app_conf/sing_binds' to populate $SINGULARITY_BINDPATH
+  //Check that each folder in $SINGULARITY_BINDPATH is listed with -B in the runOptions below. If not, add it.
+  runOptions = ' -B /gs10 -B /gs11 -B /gs12 -B /gs4 -B /gs6 -B /gs7 -B /gs8 -B /gs9 -B /vf -B /spin1 -B /data -B /fdb -B /lscratch -B /fdb/igenomes/ --env TMPDIR="/lscratch/$SLURM_JOBID" '
+}
--- a/docs/nihbiowulf.md
+++ b/docs/nihbiowulf.md
@ -0,0 +1,25 @@
+# nf-core/configs: nihbiowulf Configuration
+
+nf-core pipelines have been configured for use on the Biowulf cluster at the NIH.
+
+To use, run the pipeline with `-profile nihbiowulf`. This will download and launch the [`profile.config`](../conf/profile.config) which has been pre-configured with a setup suitable for the Biowulf cluster at NIH. Using this profile, a docker image containing all of the required software will be downloaded, and converted to a Singularity image before execution of the pipeline.
+
+## Below are non-mandatory information e.g. on modules to load etc
+
+Before running the pipeline you will need to load Nextflow and Singularity using the environment module system on Biowulf. You can do this by issuing the commands below:
+
+```bash
+## Load Nextflow and Singularity environment modules
+module purge
+module load nextflow
+module load singularity
+```
+
+## Below are non-mandatory information on iGenomes specific configuration
+
+A partial local copy of the iGenomes resource is available on Biowulf. This is a copy of the Illumina iGenomes resource from several years ago and is not up-to-date with the s3-hosted nf-core iGenomes. There are some structural differences of note. In particular, if using BWA, the igenomes.conf should be modified to specify the BWA version folder, otherwise the BWA module will fail to find an appropriate index. To date, this is the only issue, however functionality has not been extensively tested with iGenomes on Biowulf. Nonetheless, you should, in theory, be able to run the pipeline against any reference available in the `igenomes.config` specific to the nf-core pipeline.
+
+You can do this by simply using the `--genome <GENOME_ID>` parameter.
+
+>NB: You will need an account to use the HPC cluster on Biowulf in order to run the pipeline. If in doubt contact CIT.
+>NB: Nextflow will need to submit the jobs via the job scheduler to the HPC cluster. The master process submitting jobs should be run either as a batch job or on an interactive node - not on the biowulf login node. If in doubt contact Biowulf staff.
--- a/nfcore_custom.config
+++ b/nfcore_custom.config
@ -47,6 +47,7 @@ profiles {
  maestro      { includeConfig "${params.custom_config_base}/conf/maestro.config" }
  mpcdf        { includeConfig "${params.custom_config_base}/conf/mpcdf.config" }
  munin        { includeConfig "${params.custom_config_base}/conf/munin.config" }
+  nihbiowulf   { includeConfig "${params.custom_config_base}/conf/nihbiowulf.config" }
  nu_genomics  { includeConfig "${params.custom_config_base}/conf/nu_genomics.config" }
  oist         { includeConfig "${params.custom_config_base}/conf/oist.config" }
  pasteur      { includeConfig "${params.custom_config_base}/conf/pasteur.config" }