Merge pull request #14 from nf-core/master

Latest changes into fork
2024-11-22 08:29:54 +00:00 · 2021-02-14 08:50:51 +01:00 · 2021-02-14 08:50:51 +01:00 · f0ba4853df
commit f0ba4853df
parent e32f1e46ff 86566ddd3b
20 changed files with 331 additions and 20 deletions
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@ -16,7 +16,7 @@ jobs:
    needs: test_all_profiles
    strategy:
        matrix:
-          profile: ['abims', 'awsbatch', 'bi','bigpurple', 'binac', 'cbe', 'ccga_dx', 'ccga_med', 'cfc', 'cfc_dev', 'crick', 'denbi_qbic', 'ebc', 'genotoul', 'genouest', 'gis', 'google', 'hebbe', 'icr_davros', 'kraken', 'mpcdf', 'munin', 'pasteur', 'phoenix', 'prince', 'shh', 'uct_hpc', 'uppmax', 'utd_ganymede', 'uzh']
+          profile: ['abims', 'awsbatch', 'bi','bigpurple', 'binac', 'cbe', 'ccga_dx', 'ccga_med', 'cfc', 'cfc_dev', 'crick', 'denbi_qbic', 'ebc', 'genotoul', 'genouest', 'gis', 'google', 'hebbe', 'icr_davros', 'imperial', 'imperial_mb', 'kraken', 'mpcdf', 'munin', 'oist', 'pasteur', 'phoenix', 'prince', 'seg_globe', 'shh', 'uct_hpc', 'uppmax', 'utd_ganymede', 'uzh']
    steps:
      - uses: actions/checkout@v1
      - name: Install Nextflow
--- a/README.md
+++ b/README.md
@ -116,9 +116,11 @@ Currently documentation is available for the following systems:
 * [KRAKEN](docs/kraken.md)
 * [MPCDF](docs/mpcdf.md)
 * [MUNIN](docs/munin.md)
+* [OIST](docs/oist.md)
 * [PASTEUR](docs/pasteur.md)
 * [PHOENIX](docs/phoenix.md)
 * [PRINCE](docs/prince.md)
+* [SEG_GLOBE](docs/seg_globe.md)
 * [SHH](docs/shh.md)
 * [UCT_HPC](docs/uct_hpc.md)
 * [UPPMAX](docs/uppmax.md)
--- a/conf/cbe.config
+++ b/conf/cbe.config
@ -9,6 +9,7 @@ process {
  executor = 'slurm'
  queue = { task.memory <= 170.GB ? 'c' : 'm' }
  clusterOptions = { task.time <= 8.h ? '--qos short': task.time <= 48.h ? '--qos medium' : '--qos long' }
+  module = 'anaconda3/2019.10'
 }

 singularity {
--- a/conf/ebc.config
+++ b/conf/ebc.config
@ -8,7 +8,7 @@
    cleanup = true

    conda {
-        cacheDir = '/ebc_data/nf-core/conda'
+        cacheDir = '/gpfs/space/GI/ebc_data/software/nf-core/conda'
    }
    process {
        executor = 'slurm'
@ -16,7 +16,7 @@
        beforeScript = 'module load nextflow'
    }
    executor {
-        queueSize = 16
+        queueSize = 64
    }
    params {
      max_memory = 12.GB
--- a/conf/imperial.config
+++ b/conf/imperial.config
@ -0,0 +1,37 @@
+//Profile config names for nf-core/configs
+
+params {
+  // Config Params
+  config_profile_description = 'Imperial College London - HPC Profile -- provided by nf-core/configs.'
+  config_profile_contact = 'Combiz Khozoie (c.khozoie@imperial.ac.uk)'
+  config_profile_url = 'https://www.imperial.ac.uk/admin-services/ict/self-service/research-support/rcs/'
+
+  // Resources
+  max_memory = 256.GB
+  max_cpus = 32
+  max_time = 72.h
+}
+
+executor {
+  $pbspro {
+    queueSize = 50
+  }
+
+  $local {
+     cpus = 2
+     queueSize = 1
+     memory = '32 GB'
+   }
+}
+
+singularity {
+  enabled = true
+  autoMounts = true
+  runOptions = "-B /rds/,/rdsgpfs/,/rds/general/user/$USER/ephemeral/tmp/:/tmp,/rds/general/user/$USER/ephemeral/tmp/:/var/tmp"
+}
+
+process {
+
+  executor = 'pbspro'
+
+}
--- a/conf/imperial_mb.config
+++ b/conf/imperial_mb.config
@ -0,0 +1,44 @@
+//Profile config names for nf-core/configs
+
+params {
+  // Config Params
+  config_profile_description = 'Imperial College London - MEDBIO QUEUE - HPC Profile -- provided by nf-core/configs.'
+  config_profile_contact = 'Combiz Khozoie (c.khozoie@imperial.ac.uk)'
+  config_profile_url = 'https://www.imperial.ac.uk/bioinformatics-data-science-group/resources/uk-med-bio/'
+
+  // Resources
+  max_memory = 640.GB
+  max_cpus = 32
+  max_time = 168.h
+}
+
+executor {
+  $pbspro {
+    queueSize = 50
+  }
+
+  $local {
+    cpus = 2
+    queueSize = 1
+    memory = '32 GB'
+  }
+}
+
+singularity {
+  enabled = true
+  autoMounts = true
+  runOptions = "-B /rds/,/rdsgpfs/,/rds/general/user/$USER/ephemeral/tmp/:/tmp,/rds/general/user/$USER/ephemeral/tmp/:/var/tmp"
+}
+
+process {
+
+  executor = 'pbspro'
+  queue = 'pqmedbio-tput'
+
+  //queue = 'med-bio' //!! this is an alias and shouldn't be used
+
+  withLabel:process_large {
+    queue = 'pqmedbio-large'
+  }
+
+}
--- a/conf/oist.config
+++ b/conf/oist.config
@ -0,0 +1,22 @@
+//Profile config names for nf-core/configs
+params {
+  config_profile_description = 'The Okinawa Institute of Science and Technology Graduate University (OIST) HPC cluster profile provided by nf-core/configs.'
+  config_profile_contact = 'OISTs Bioinformatics User Group <BioinfoUgrp@oist.jp>'
+  config_profile_url = 'https://github.com/nf-core/configs/blob/master/docs/oist.md'
+}
+
+singularity {
+  enabled = true
+}
+
+process {
+  executor = 'slurm'
+  queue = 'compute'
+  clusterOptions = '-C zen2'
+}
+
+params {
+  max_memory = 500.GB
+  max_cpus = 128
+  max_time = 90.h
+}
--- a/conf/pipeline/ampliseq/uppmax.config
+++ b/conf/pipeline/ampliseq/uppmax.config
@ -1,15 +1,20 @@
 // Profile config names for nf-core/configs
-
 params {
  // Specific nf-core/configs params
  config_profile_contact = 'Daniel Lundin (daniel.lundin@lnu.se)'
  config_profile_description = 'nf-core/ampliseq UPPMAX profile provided by nf-core/configs'
 }

-withName: make_SILVA_132_16S_classifier {
-  clusterOptions = { "-A $params.project -C fat -p node -N 1 ${params.clusterOptions ?: ''}" }
-}
+process {
+  withName: classifier_extract_seq {
+    clusterOptions = { "-A $params.project -p core -n 1 -t 7-00:00:00 ${params.clusterOptions ?: ''}" }
+  }

-withName: classifier {
-  clusterOptions = { "-A $params.project -C fat -p node -N 1 ${params.clusterOptions ?: ''}" }
+  withName: classifier_train {
+    clusterOptions = { "-A $params.project -C fat -p node -N 1 -t 24:00:00 ${params.clusterOptions ?: ''}" }
+  }
+
+  withName: classifier {
+    clusterOptions = { "-A $params.project -C fat -p node -N 1 ${params.clusterOptions ?: ''}" }
+  }
 }
--- a/conf/pipeline/scflow/imperial.config
+++ b/conf/pipeline/scflow/imperial.config
@ -0,0 +1,18 @@
+// scflow/imperial specific profile config
+
+params {
+  // Config Params
+  config_profile_description = 'Imperial College London - HPC - nf-core/scFlow Profile -- provided by nf-core/configs.'
+  config_profile_contact = 'Combiz Khozoie (c.khozoie@imperial.ac.uk)'
+
+  // Analysis Resource Params
+  ctd_folder = "/rds/general/user/$USER/projects/ukdrmultiomicsproject/live/Analyses/scFlowResources/refs/ctd"
+  ensembl_mappings = "/rds/general/user/$USER/projects/ukdrmultiomicsproject/live/Analyses/scFlowResources/src/ensembl-ids/ensembl_mappings.tsv"
+}
+
+singularity {
+  enabled = true
+  autoMounts = true
+  cacheDir = "/rds/general/user/$USER/projects/ukdrmultiomicsproject/live/.singularity-cache"
+  runOptions = "-B /rds/,/rdsgpfs/,/rds/general/user/$USER/ephemeral/tmp/:/tmp,/rds/general/user/$USER/ephemeral/tmp/:/var/tmp"
+}
--- a/conf/seg_globe.config
+++ b/conf/seg_globe.config
@ -0,0 +1,27 @@
+//Profile config names for nf-core/configs
+params {
+  config_profile_description = 'Section for Evolutionary Genomics @ GLOBE, University of Copenhagen - seg_globe profile provided by nf-core/configs.'
+  config_profile_contact = 'Aashild Vaagene (@ashildv)'
+  config_profile_url = 'https://globe.ku.dk/research/evogenomics/'
+  max_memory = 250.GB
+  max_cpus = 35
+  max_time = 720.h
+}
+
+singularity {
+  enabled = true
+  autoMounts = true
+  cacheDir = '/shared/volume/hologenomics/data/cache/nf-eager/singularity'
+}
+
+process {
+ executor = 'slurm'
+ queue = { task.time < 24.h ? 'hologenomics-short' : task.time < 168.h ? 'hologenomics' : 'hologenomics-long' }
+}
+        
+cleanup = true
+       
+executor {
+  queueSize = 8
+}
+
--- a/conf/shh.config
+++ b/conf/shh.config
@ -10,6 +10,7 @@ params {
  igenomes_base = "/projects1/public_data/igenomes/"
 }

+// Preform work directory cleanup after a successful run
 cleanup = true

 singularity {
@ -20,7 +21,6 @@ singularity {

 process {
    executor = 'slurm'
-    queue = { task.memory > 756.GB || task.cpus > 64 ? 'supercruncher': task.time <= 2.h ? 'short' : task.time <= 48.h ? 'medium': 'long' }
 }

 executor {
@ -39,9 +39,16 @@ profiles {
  }
  sdag {
    params {
-      config_profile_description = 'SDAG MPI-SHH profile, provided by nf-core/configs.'
-      max_memory = 2.TB
-      max_cpus = 128
+          config_profile_description = 'SDAG MPI-SHH profile, provided by nf-core/configs.'
+          max_memory = 2.TB
+          max_cpus = 128
+      }
+      process {
+          queue = { task.memory > 756.GB || task.cpus > 64 ? 'supercruncher': task.time <= 2.h ? 'short' : task.time <= 48.h ? 'medium': 'long' }
     }
  }
+  // Profile to deactivate automatic cleanup of work directory after a successful run. Overwrites cleanup option.
+  debug {
+    cleanup = false
+  }
 }
--- a/docs/google.md
+++ b/docs/google.md
@ -14,21 +14,21 @@ nextflow run nf-core/rnaseq -profile test,google --google_bucket <gs://your_buck

 ### Required Parameters

-#### `--google-bucket`
+#### `--google_bucket`

 The Google Cloud Storage bucket location to be used as a Nextflow work directory. Can also be specified with (`-w gs://your_bucket/work`).

 ### Optional Parameters

-#### `--google-zone`
+#### `--google_zone`

 The Google zone where the computation is executed in Compute Engine VMs. Multiple zones can be provided separating them by a comma. Default (`europe-west2-c`).

-#### `--google-preemptible`
+#### `--google_preemptible`

 Enables the usage of preemptible virtual machines with a retry error statergy for up to 5 retries. Default (`true`).

-#### `--google-debug`
+#### `--google_debug`

 Copies the /google debug directory from the VM to the task bucket directory. Useful for debugging. Default (`false`).

--- a/docs/imperial.md
+++ b/docs/imperial.md
@ -0,0 +1,16 @@
+# nf-core/configs: Imperial CX1 HPC Configuration
+
+All nf-core pipelines have been successfully configured for use on the CX1 cluster at Imperial College London HPC.
+
+To use, run the pipeline with `-profile imperial`. This will download and launch the [`imperial.config`](../conf/imperial.config) which has been pre-configured with a setup suitable for the CX1 cluster. Using this profile, a docker image containing all of the required software will be downloaded, and converted to a Singularity image before execution of the pipeline.
+
+Before running the pipeline you will need to load Nextflow using the environment module system on the CX1 cluster. You can do this by issuing the commands below:
+
+```bash
+## Load Nextflow and Singularity environment modules
+module load Nextflow
+```
+
+>NB: You will need an account to use the HPC cluster CX1 in order to run the pipeline. If in doubt contact IT.
+>NB: Nextflow will need to submit the jobs via the job scheduler to the HPC cluster and as such the commands above will have to be executed on one of the login nodes. If in doubt contact IT.
+>NB: To submit jobs to the Imperial College MEDBIO cluster, use `-profile imperial_mb` instead.
--- a/docs/imperial_mb.md
+++ b/docs/imperial_mb.md
@ -0,0 +1,16 @@
+# nf-core/configs: Imperial MEDBIO HPC Configuration
+
+All nf-core pipelines have been successfully configured for use on the MEDBIO cluster at Imperial College London HPC.
+
+To use, run the pipeline with `-profile imperial_mb`. This will download and launch the [`imperial_mb.config`](../conf/imperial_mb.config) which has been pre-configured with a setup suitable for the MEDBIO cluster. Using this profile, a docker image containing all of the required software will be downloaded, and converted to a Singularity image before execution of the pipeline.
+
+Before running the pipeline you will need to load Nextflow using the environment module system on the head node. You can do this by issuing the commands below:
+
+```bash
+## Load Nextflow and Singularity environment modules
+module load Nextflow
+```
+
+>NB: You will need an account to use the HPC cluster MEDBIO in order to run the pipeline. Access to the MEDBIO queue is exclusive.  If in doubt contact IT.
+>NB: Nextflow will need to submit the jobs via the job scheduler to the HPC cluster and as such the commands above will have to be executed on one of the login nodes. If in doubt contact IT.
+>NB: To submit jobs to the standard CX1 cluster at Imperial College, use `-profile imperial` instead.
--- a/docs/oist.md
+++ b/docs/oist.md
@ -0,0 +1,33 @@
+# nf-core/configs: OIST Configuration
+
+The nf-core pipelines [rnaseq](https://nf-co.re/rnaseq) and
+[eager](https://nf-co.re/eager) have been successfully tested on the _Deigo_
+cluster at the Okinawa Institute of Science and Technology Graduate University
+([OIST](https://www.oist.jp)).  We have no reason to expect that other
+pipelines would not work.
+
+To use, run the pipeline with `-profile oist`.  This will download and launch
+the [`oist.config`](../conf/oist.config) which has been pre-configured with a
+setup suitable for _Deigo_.  Using this profile, a docker image containing all
+of the required software will be downloaded, and converted to a Singularity
+image before execution of the pipeline.
+
+## Below are non-mandatory information e.g. on modules to load etc
+
+Before running the pipeline you will need to load Nextflow and Singularity
+using the environment module system on _Deigo_.  You can do this by issuing the
+commands below:
+
+```bash
+## Load the latest Nextflow and Singularity environment modules
+ml purge
+ml bioinfo-ugrp-modules
+ml Other/Nextflow
+```
+
+>NB: You will need an account to use the _Deigo_ cluster in order to run the
+>pipeline.  If in doubt contact IT.
+>
+>NB: Nextflow will submit the jobs via the SLURM scheduler to the HPC cluster
+>and as such the commands above will have to be executed on one of the login
+>nodes.  If in doubt contact IT.
--- a/docs/pipeline/scflow/imperial.md
+++ b/docs/pipeline/scflow/imperial.md
@ -0,0 +1,21 @@
+# nf-core/configs: Imperial scflow Specific Configuration
+
+Extra specific configuration for the scflow pipeline
+
+## Usage
+
+To use, run the pipeline with `-profile imperial` or `-profile imperial_mb`.
+
+This will download and launch the scflow specific [`imperial.config`](../../../conf/pipeline/scflow/imperial.config) which has been pre-configured with a setup suitable for the Imperial HPC cluster.
+
+Example: `nextflow run nf-core/scflow -profile imperial`
+
+## scflow specific configurations for Imperial
+
+Specific configurations for Imperial have been made for scflow.
+
+* Singularity `enabled` and `autoMounts` set to `true`
+* Singularity `cacheDir` path set to an RDS location
+* Singularity `runOptions` path set to bind (`-B`) RDS paths with container paths.
+* Params `ctd_folder` set to an RDS location.
+* Parms `ensembl_mappings` set to an RDS location.
--- a/docs/seg_globe.md
+++ b/docs/seg_globe.md
@ -0,0 +1,21 @@
+# nf-core/configs: Section for Evolutionary Genomics at GLOBE, Univeristy of Copenhagen (hologenomics partition on HPC) Configuration
+
+> **NB:** You will need an account to use the HPC cluster to run the pipeline. If in doubt contact IT.
+
+The profile is configured to run with Singularity version 3.6.3-1.el7 which is part of the OS installtion and does not need to be loaded as a module.
+
+Before running the pipeline you will need to load Java, miniconda and Nextflow. You can do this by including the commands below in your SLURM/sbatch script:
+
+```bash
+## Load Java and Nextflow environment modules
+module purge
+module load lib
+module load java/v1.8.0_202-jdk miniconda nextflow/v20.07.1.5412
+```
+
+All of the intermediate files required to run the pipeline will be stored in the `work/` directory. It is recommended to delete this directory after the pipeline has finished successfully because it can get quite large, and all of the main output files will be saved in the `results/` directory anyway.
+The config contains a `cleanup` command that removes the `work/` directory automatically once the pipeline has completeed successfully. If the run does not complete successfully then the `work/` dir should be removed manually to save storage space.
+
+This configuration will automatically choose the correct SLURM queue (short,medium,long) depending on the time and memory required by each process.
+
+> **NB:** Nextflow will need to submit the jobs via SLURM to the HPC cluster and as such the commands above will have to be submitted from one of the login nodes.
--- a/docs/uppmax.md
+++ b/docs/uppmax.md
@ -2,6 +2,10 @@

 All nf-core pipelines have been successfully configured for use on the Swedish UPPMAX clusters.

+## Getting help
+
+We have a Slack channel dedicated to UPPMAX users on the nf-core Slack: [https://nfcore.slack.com/channels/uppmax](https://nfcore.slack.com/channels/uppmax)
+
 ## Using the UPPMAX config profile

 To use, run the pipeline with `-profile uppmax` (one hyphen).
@ -12,14 +16,19 @@ In addition to this config profile, you will also need to specify an UPPMAX proj
 You can do this with the `--project` flag (two hyphens) when launching nextflow. For example:

 ```bash
-nextflow run nf-core/PIPELINE -profile uppmax --project SNIC 2018/1-234 # ..rest of pipeline flags
+nextflow run nf-core/PIPELINE -profile uppmax --project snic2018-1-234 # ..rest of pipeline flags
 ```

+> NB: If you're not sure what your UPPMAX project ID is, try running `groups` or checking SUPR.
+
 Before running the pipeline you will need to either install Nextflow or load it using the environment module system.

-This config enables Nextflow to manage the pipeline jobs via the Slurm job scheduler.
+This config enables Nextflow to manage the pipeline jobs via the Slurm job scheduler and using Singularity for software management.
+
 Just run Nextflow on a login node and it will handle everything else.

+Remember to use `-bg` to launch Nextflow in the background, so that the pipeline doesn't exit if you leave your terminal session.
+
 ## Using iGenomes references

 A local copy of the iGenomes resource has been made available on all UPPMAX clusters so you should be able to run the pipeline against any reference available in the `igenomes.config`.
@ -40,7 +49,7 @@ Note that each job will still start with the same request as normal, but restart

 All jobs will be submitted to fat nodes using this method, so it's only for use in extreme circumstances.

-## How to specify a UPPMAX cluster
+## Different UPPMAX clusters

 The UPPMAX nf-core configuration profile uses the `hostname` of the active environment to automatically apply the following resource limits:

@ -64,3 +73,15 @@ All jobs are limited to 1 hour to be eligible for this queue and only one job al
 It is not suitable for use with real data.

 To use it, submit with `-profile uppmax,devel`.
+
+## Running on Bianca
+
+For security reasons, there is no internet access on Bianca so you can't download from or upload files to the cluster directly. Before running a nf-core pipeline on Bianca you will first have to download the pipeline and singularity images needed elsewhere and transfer them via the wharf area to your Bianca project.
+
+You can follow the guide for downloading pipelines [for offline use](https://nf-co.re/tools#downloading-pipelines-for-offline-use). Note that you will have to download the singularity images as well.
+
+Next transfer the pipeline and the singularity images to your project. Before running the pipeline you will have to indicate to nextflow where the singularity images are located by setting `NXF_SINGULARITY_CACHEDIR` :
+
+`export NXF_SINGULARITY_CACHEDIR=Your_Location_For_The_Singularity_directory/.`
+
+You should now be able to run your nf-core pipeline on bianca.
--- a/nfcore_custom.config
+++ b/nfcore_custom.config
@ -24,6 +24,8 @@ profiles {
  czbiohub_aws { includeConfig "${params.custom_config_base}/conf/czbiohub_aws.config" }
  ebc          { includeConfig "${params.custom_config_base}/conf/ebc.config" }
  icr_davros   { includeConfig "${params.custom_config_base}/conf/icr_davros.config" }
+  imperial     { includeConfig "${params.custom_config_base}/conf/imperial.config" }
+  imperial_mb  { includeConfig "${params.custom_config_base}/conf/imperial_mb.config" }  
  genotoul     { includeConfig "${params.custom_config_base}/conf/genotoul.config" }
  google       { includeConfig "${params.custom_config_base}/conf/google.config" }
  denbi_qbic   { includeConfig "${params.custom_config_base}/conf/denbi_qbic.config" }
@ -33,9 +35,11 @@ profiles {
  kraken       { includeConfig "${params.custom_config_base}/conf/kraken.config" }
  mpcdf        { includeConfig "${params.custom_config_base}/conf/mpcdf.config" }
  munin        { includeConfig "${params.custom_config_base}/conf/munin.config" }
+  oist         { includeConfig "${params.custom_config_base}/conf/oist.config" }
  pasteur      { includeConfig "${params.custom_config_base}/conf/pasteur.config" }
  phoenix      { includeConfig "${params.custom_config_base}/conf/phoenix.config" }
  prince       { includeConfig "${params.custom_config_base}/conf/prince.config" }
+  seg_globe    { includeConfig "${params.custom_config_base}/conf/seg_globe.config"}
  shh          { includeConfig "${params.custom_config_base}/conf/shh.config" }
  uct_hpc      { includeConfig "${params.custom_config_base}/conf/uct_hpc.config" }
  uppmax       { includeConfig "${params.custom_config_base}/conf/uppmax.config" }
@ -54,6 +58,8 @@ params {
    cfc: ['.hpc.uni-tuebingen.de'],
    crick: ['.thecrick.org'],
    icr_davros: ['.davros.compute.estate'],
+    imperial: ['.hpc.ic.ac.uk'],
+    imperial_mb: ['.hpc.ic.ac.uk'],
    genotoul: ['.genologin1.toulouse.inra.fr', '.genologin2.toulouse.inra.fr'],
    genouest: ['.genouest.org'],
    uppmax: ['.uppmax.uu.se'],
--- a/pipeline/scflow.config
+++ b/pipeline/scflow.config
@ -0,0 +1,14 @@
+/*
+ * -------------------------------------------------
+ *  nfcore/scflow custom profile Nextflow config file
+ * -------------------------------------------------
+ * Config options for custom environments.
+ * Cluster-specific config options should be saved
+ * in the conf/pipeline/scflow folder and imported
+ * under a profile name here.
+ */
+
+profiles {
+  imperial { includeConfig "${params.custom_config_base}/conf/pipeline/scflow/imperial.config" }
+  imperial_mb { includeConfig "${params.custom_config_base}/conf/pipeline/scflow/imperial.config" } // intended
+}