diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index dfb0782..3e5c930 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -16,7 +16,7 @@ jobs: needs: test_all_profiles strategy: matrix: - profile: ['abims', 'awsbatch', 'bi','bigpurple', 'binac', 'cbe', 'ccga_dx', 'ccga_med', 'cfc', 'cfc_dev', 'crick', 'denbi_qbic', 'ebc', 'genotoul', 'genouest', 'gis', 'google', 'hebbe', 'icr_davros', 'kraken', 'mpcdf', 'munin', 'pasteur', 'phoenix', 'prince', 'shh', 'uct_hpc', 'uppmax', 'utd_ganymede', 'uzh'] + profile: ['abims', 'awsbatch', 'bi','bigpurple', 'binac', 'cbe', 'ccga_dx', 'ccga_med', 'cfc', 'cfc_dev', 'crick', 'denbi_qbic', 'ebc', 'genotoul', 'genouest', 'gis', 'google', 'hebbe', 'icr_davros', 'imperial', 'imperial_mb', 'kraken', 'mpcdf', 'munin', 'oist', 'pasteur', 'phoenix', 'prince', 'seg_globe', 'shh', 'uct_hpc', 'uppmax', 'utd_ganymede', 'uzh'] steps: - uses: actions/checkout@v1 - name: Install Nextflow diff --git a/README.md b/README.md index a967049..45721e8 100644 --- a/README.md +++ b/README.md @@ -116,9 +116,11 @@ Currently documentation is available for the following systems: * [KRAKEN](docs/kraken.md) * [MPCDF](docs/mpcdf.md) * [MUNIN](docs/munin.md) +* [OIST](docs/oist.md) * [PASTEUR](docs/pasteur.md) * [PHOENIX](docs/phoenix.md) * [PRINCE](docs/prince.md) +* [SEG_GLOBE](docs/seg_globe.md) * [SHH](docs/shh.md) * [UCT_HPC](docs/uct_hpc.md) * [UPPMAX](docs/uppmax.md) diff --git a/conf/cbe.config b/conf/cbe.config index 0e9b43e..0a5763f 100755 --- a/conf/cbe.config +++ b/conf/cbe.config @@ -9,6 +9,7 @@ process { executor = 'slurm' queue = { task.memory <= 170.GB ? 'c' : 'm' } clusterOptions = { task.time <= 8.h ? '--qos short': task.time <= 48.h ? '--qos medium' : '--qos long' } + module = 'anaconda3/2019.10' } singularity { diff --git a/conf/ebc.config b/conf/ebc.config index 4df0596..8f007ed 100644 --- a/conf/ebc.config +++ b/conf/ebc.config @@ -8,7 +8,7 @@ cleanup = true conda { - cacheDir = '/ebc_data/nf-core/conda' + cacheDir = '/gpfs/space/GI/ebc_data/software/nf-core/conda' } process { executor = 'slurm' @@ -16,7 +16,7 @@ beforeScript = 'module load nextflow' } executor { - queueSize = 16 + queueSize = 64 } params { max_memory = 12.GB diff --git a/conf/imperial.config b/conf/imperial.config new file mode 100644 index 0000000..f40d92b --- /dev/null +++ b/conf/imperial.config @@ -0,0 +1,37 @@ +//Profile config names for nf-core/configs + +params { + // Config Params + config_profile_description = 'Imperial College London - HPC Profile -- provided by nf-core/configs.' + config_profile_contact = 'Combiz Khozoie (c.khozoie@imperial.ac.uk)' + config_profile_url = 'https://www.imperial.ac.uk/admin-services/ict/self-service/research-support/rcs/' + + // Resources + max_memory = 256.GB + max_cpus = 32 + max_time = 72.h +} + +executor { + $pbspro { + queueSize = 50 + } + + $local { + cpus = 2 + queueSize = 1 + memory = '32 GB' + } +} + +singularity { + enabled = true + autoMounts = true + runOptions = "-B /rds/,/rdsgpfs/,/rds/general/user/$USER/ephemeral/tmp/:/tmp,/rds/general/user/$USER/ephemeral/tmp/:/var/tmp" +} + +process { + + executor = 'pbspro' + +} diff --git a/conf/imperial_mb.config b/conf/imperial_mb.config new file mode 100644 index 0000000..a89ffcd --- /dev/null +++ b/conf/imperial_mb.config @@ -0,0 +1,44 @@ +//Profile config names for nf-core/configs + +params { + // Config Params + config_profile_description = 'Imperial College London - MEDBIO QUEUE - HPC Profile -- provided by nf-core/configs.' + config_profile_contact = 'Combiz Khozoie (c.khozoie@imperial.ac.uk)' + config_profile_url = 'https://www.imperial.ac.uk/bioinformatics-data-science-group/resources/uk-med-bio/' + + // Resources + max_memory = 640.GB + max_cpus = 32 + max_time = 168.h +} + +executor { + $pbspro { + queueSize = 50 + } + + $local { + cpus = 2 + queueSize = 1 + memory = '32 GB' + } +} + +singularity { + enabled = true + autoMounts = true + runOptions = "-B /rds/,/rdsgpfs/,/rds/general/user/$USER/ephemeral/tmp/:/tmp,/rds/general/user/$USER/ephemeral/tmp/:/var/tmp" +} + +process { + + executor = 'pbspro' + queue = 'pqmedbio-tput' + + //queue = 'med-bio' //!! this is an alias and shouldn't be used + + withLabel:process_large { + queue = 'pqmedbio-large' + } + +} diff --git a/conf/oist.config b/conf/oist.config new file mode 100644 index 0000000..8815ed4 --- /dev/null +++ b/conf/oist.config @@ -0,0 +1,22 @@ +//Profile config names for nf-core/configs +params { + config_profile_description = 'The Okinawa Institute of Science and Technology Graduate University (OIST) HPC cluster profile provided by nf-core/configs.' + config_profile_contact = 'OISTs Bioinformatics User Group ' + config_profile_url = 'https://github.com/nf-core/configs/blob/master/docs/oist.md' +} + +singularity { + enabled = true +} + +process { + executor = 'slurm' + queue = 'compute' + clusterOptions = '-C zen2' +} + +params { + max_memory = 500.GB + max_cpus = 128 + max_time = 90.h +} diff --git a/conf/pipeline/ampliseq/uppmax.config b/conf/pipeline/ampliseq/uppmax.config index 36a1c3b..2a8bc34 100644 --- a/conf/pipeline/ampliseq/uppmax.config +++ b/conf/pipeline/ampliseq/uppmax.config @@ -1,15 +1,20 @@ // Profile config names for nf-core/configs - params { // Specific nf-core/configs params config_profile_contact = 'Daniel Lundin (daniel.lundin@lnu.se)' config_profile_description = 'nf-core/ampliseq UPPMAX profile provided by nf-core/configs' } -withName: make_SILVA_132_16S_classifier { - clusterOptions = { "-A $params.project -C fat -p node -N 1 ${params.clusterOptions ?: ''}" } -} +process { + withName: classifier_extract_seq { + clusterOptions = { "-A $params.project -p core -n 1 -t 7-00:00:00 ${params.clusterOptions ?: ''}" } + } -withName: classifier { - clusterOptions = { "-A $params.project -C fat -p node -N 1 ${params.clusterOptions ?: ''}" } + withName: classifier_train { + clusterOptions = { "-A $params.project -C fat -p node -N 1 -t 24:00:00 ${params.clusterOptions ?: ''}" } + } + + withName: classifier { + clusterOptions = { "-A $params.project -C fat -p node -N 1 ${params.clusterOptions ?: ''}" } + } } diff --git a/conf/pipeline/scflow/imperial.config b/conf/pipeline/scflow/imperial.config new file mode 100644 index 0000000..7f46466 --- /dev/null +++ b/conf/pipeline/scflow/imperial.config @@ -0,0 +1,18 @@ +// scflow/imperial specific profile config + +params { + // Config Params + config_profile_description = 'Imperial College London - HPC - nf-core/scFlow Profile -- provided by nf-core/configs.' + config_profile_contact = 'Combiz Khozoie (c.khozoie@imperial.ac.uk)' + + // Analysis Resource Params + ctd_folder = "/rds/general/user/$USER/projects/ukdrmultiomicsproject/live/Analyses/scFlowResources/refs/ctd" + ensembl_mappings = "/rds/general/user/$USER/projects/ukdrmultiomicsproject/live/Analyses/scFlowResources/src/ensembl-ids/ensembl_mappings.tsv" +} + +singularity { + enabled = true + autoMounts = true + cacheDir = "/rds/general/user/$USER/projects/ukdrmultiomicsproject/live/.singularity-cache" + runOptions = "-B /rds/,/rdsgpfs/,/rds/general/user/$USER/ephemeral/tmp/:/tmp,/rds/general/user/$USER/ephemeral/tmp/:/var/tmp" +} \ No newline at end of file diff --git a/conf/seg_globe.config b/conf/seg_globe.config new file mode 100644 index 0000000..41a3d6e --- /dev/null +++ b/conf/seg_globe.config @@ -0,0 +1,27 @@ +//Profile config names for nf-core/configs +params { + config_profile_description = 'Section for Evolutionary Genomics @ GLOBE, University of Copenhagen - seg_globe profile provided by nf-core/configs.' + config_profile_contact = 'Aashild Vaagene (@ashildv)' + config_profile_url = 'https://globe.ku.dk/research/evogenomics/' + max_memory = 250.GB + max_cpus = 35 + max_time = 720.h +} + +singularity { + enabled = true + autoMounts = true + cacheDir = '/shared/volume/hologenomics/data/cache/nf-eager/singularity' +} + +process { + executor = 'slurm' + queue = { task.time < 24.h ? 'hologenomics-short' : task.time < 168.h ? 'hologenomics' : 'hologenomics-long' } +} + +cleanup = true + +executor { + queueSize = 8 +} + diff --git a/conf/shh.config b/conf/shh.config index 02c186b..ab3dcca 100644 --- a/conf/shh.config +++ b/conf/shh.config @@ -10,6 +10,7 @@ params { igenomes_base = "/projects1/public_data/igenomes/" } +// Preform work directory cleanup after a successful run cleanup = true singularity { @@ -20,7 +21,6 @@ singularity { process { executor = 'slurm' - queue = { task.memory > 756.GB || task.cpus > 64 ? 'supercruncher': task.time <= 2.h ? 'short' : task.time <= 48.h ? 'medium': 'long' } } executor { @@ -39,9 +39,16 @@ profiles { } sdag { params { - config_profile_description = 'SDAG MPI-SHH profile, provided by nf-core/configs.' - max_memory = 2.TB - max_cpus = 128 + config_profile_description = 'SDAG MPI-SHH profile, provided by nf-core/configs.' + max_memory = 2.TB + max_cpus = 128 + } + process { + queue = { task.memory > 756.GB || task.cpus > 64 ? 'supercruncher': task.time <= 2.h ? 'short' : task.time <= 48.h ? 'medium': 'long' } } } + // Profile to deactivate automatic cleanup of work directory after a successful run. Overwrites cleanup option. + debug { + cleanup = false + } } diff --git a/docs/google.md b/docs/google.md index 9e8c521..d488b8a 100644 --- a/docs/google.md +++ b/docs/google.md @@ -14,21 +14,21 @@ nextflow run nf-core/rnaseq -profile test,google --google_bucket NB: You will need an account to use the HPC cluster CX1 in order to run the pipeline. If in doubt contact IT. +>NB: Nextflow will need to submit the jobs via the job scheduler to the HPC cluster and as such the commands above will have to be executed on one of the login nodes. If in doubt contact IT. +>NB: To submit jobs to the Imperial College MEDBIO cluster, use `-profile imperial_mb` instead. diff --git a/docs/imperial_mb.md b/docs/imperial_mb.md new file mode 100644 index 0000000..d7f7f15 --- /dev/null +++ b/docs/imperial_mb.md @@ -0,0 +1,16 @@ +# nf-core/configs: Imperial MEDBIO HPC Configuration + +All nf-core pipelines have been successfully configured for use on the MEDBIO cluster at Imperial College London HPC. + +To use, run the pipeline with `-profile imperial_mb`. This will download and launch the [`imperial_mb.config`](../conf/imperial_mb.config) which has been pre-configured with a setup suitable for the MEDBIO cluster. Using this profile, a docker image containing all of the required software will be downloaded, and converted to a Singularity image before execution of the pipeline. + +Before running the pipeline you will need to load Nextflow using the environment module system on the head node. You can do this by issuing the commands below: + +```bash +## Load Nextflow and Singularity environment modules +module load Nextflow +``` + +>NB: You will need an account to use the HPC cluster MEDBIO in order to run the pipeline. Access to the MEDBIO queue is exclusive. If in doubt contact IT. +>NB: Nextflow will need to submit the jobs via the job scheduler to the HPC cluster and as such the commands above will have to be executed on one of the login nodes. If in doubt contact IT. +>NB: To submit jobs to the standard CX1 cluster at Imperial College, use `-profile imperial` instead. diff --git a/docs/oist.md b/docs/oist.md new file mode 100644 index 0000000..8c4a68c --- /dev/null +++ b/docs/oist.md @@ -0,0 +1,33 @@ +# nf-core/configs: OIST Configuration + +The nf-core pipelines [rnaseq](https://nf-co.re/rnaseq) and +[eager](https://nf-co.re/eager) have been successfully tested on the _Deigo_ +cluster at the Okinawa Institute of Science and Technology Graduate University +([OIST](https://www.oist.jp)). We have no reason to expect that other +pipelines would not work. + +To use, run the pipeline with `-profile oist`. This will download and launch +the [`oist.config`](../conf/oist.config) which has been pre-configured with a +setup suitable for _Deigo_. Using this profile, a docker image containing all +of the required software will be downloaded, and converted to a Singularity +image before execution of the pipeline. + +## Below are non-mandatory information e.g. on modules to load etc + +Before running the pipeline you will need to load Nextflow and Singularity +using the environment module system on _Deigo_. You can do this by issuing the +commands below: + +```bash +## Load the latest Nextflow and Singularity environment modules +ml purge +ml bioinfo-ugrp-modules +ml Other/Nextflow +``` + +>NB: You will need an account to use the _Deigo_ cluster in order to run the +>pipeline. If in doubt contact IT. +> +>NB: Nextflow will submit the jobs via the SLURM scheduler to the HPC cluster +>and as such the commands above will have to be executed on one of the login +>nodes. If in doubt contact IT. diff --git a/docs/pipeline/scflow/imperial.md b/docs/pipeline/scflow/imperial.md new file mode 100644 index 0000000..be2cc59 --- /dev/null +++ b/docs/pipeline/scflow/imperial.md @@ -0,0 +1,21 @@ +# nf-core/configs: Imperial scflow Specific Configuration + +Extra specific configuration for the scflow pipeline + +## Usage + +To use, run the pipeline with `-profile imperial` or `-profile imperial_mb`. + +This will download and launch the scflow specific [`imperial.config`](../../../conf/pipeline/scflow/imperial.config) which has been pre-configured with a setup suitable for the Imperial HPC cluster. + +Example: `nextflow run nf-core/scflow -profile imperial` + +## scflow specific configurations for Imperial + +Specific configurations for Imperial have been made for scflow. + +* Singularity `enabled` and `autoMounts` set to `true` +* Singularity `cacheDir` path set to an RDS location +* Singularity `runOptions` path set to bind (`-B`) RDS paths with container paths. +* Params `ctd_folder` set to an RDS location. +* Parms `ensembl_mappings` set to an RDS location. diff --git a/docs/seg_globe.md b/docs/seg_globe.md new file mode 100644 index 0000000..da03737 --- /dev/null +++ b/docs/seg_globe.md @@ -0,0 +1,21 @@ +# nf-core/configs: Section for Evolutionary Genomics at GLOBE, Univeristy of Copenhagen (hologenomics partition on HPC) Configuration + +> **NB:** You will need an account to use the HPC cluster to run the pipeline. If in doubt contact IT. + +The profile is configured to run with Singularity version 3.6.3-1.el7 which is part of the OS installtion and does not need to be loaded as a module. + +Before running the pipeline you will need to load Java, miniconda and Nextflow. You can do this by including the commands below in your SLURM/sbatch script: + +```bash +## Load Java and Nextflow environment modules +module purge +module load lib +module load java/v1.8.0_202-jdk miniconda nextflow/v20.07.1.5412 +``` + +All of the intermediate files required to run the pipeline will be stored in the `work/` directory. It is recommended to delete this directory after the pipeline has finished successfully because it can get quite large, and all of the main output files will be saved in the `results/` directory anyway. +The config contains a `cleanup` command that removes the `work/` directory automatically once the pipeline has completeed successfully. If the run does not complete successfully then the `work/` dir should be removed manually to save storage space. + +This configuration will automatically choose the correct SLURM queue (short,medium,long) depending on the time and memory required by each process. + +> **NB:** Nextflow will need to submit the jobs via SLURM to the HPC cluster and as such the commands above will have to be submitted from one of the login nodes. diff --git a/docs/uppmax.md b/docs/uppmax.md index 0f23cd1..d9ac5ce 100644 --- a/docs/uppmax.md +++ b/docs/uppmax.md @@ -2,6 +2,10 @@ All nf-core pipelines have been successfully configured for use on the Swedish UPPMAX clusters. +## Getting help + +We have a Slack channel dedicated to UPPMAX users on the nf-core Slack: [https://nfcore.slack.com/channels/uppmax](https://nfcore.slack.com/channels/uppmax) + ## Using the UPPMAX config profile To use, run the pipeline with `-profile uppmax` (one hyphen). @@ -12,14 +16,19 @@ In addition to this config profile, you will also need to specify an UPPMAX proj You can do this with the `--project` flag (two hyphens) when launching nextflow. For example: ```bash -nextflow run nf-core/PIPELINE -profile uppmax --project SNIC 2018/1-234 # ..rest of pipeline flags +nextflow run nf-core/PIPELINE -profile uppmax --project snic2018-1-234 # ..rest of pipeline flags ``` +> NB: If you're not sure what your UPPMAX project ID is, try running `groups` or checking SUPR. + Before running the pipeline you will need to either install Nextflow or load it using the environment module system. -This config enables Nextflow to manage the pipeline jobs via the Slurm job scheduler. +This config enables Nextflow to manage the pipeline jobs via the Slurm job scheduler and using Singularity for software management. + Just run Nextflow on a login node and it will handle everything else. +Remember to use `-bg` to launch Nextflow in the background, so that the pipeline doesn't exit if you leave your terminal session. + ## Using iGenomes references A local copy of the iGenomes resource has been made available on all UPPMAX clusters so you should be able to run the pipeline against any reference available in the `igenomes.config`. @@ -40,7 +49,7 @@ Note that each job will still start with the same request as normal, but restart All jobs will be submitted to fat nodes using this method, so it's only for use in extreme circumstances. -## How to specify a UPPMAX cluster +## Different UPPMAX clusters The UPPMAX nf-core configuration profile uses the `hostname` of the active environment to automatically apply the following resource limits: @@ -64,3 +73,15 @@ All jobs are limited to 1 hour to be eligible for this queue and only one job al It is not suitable for use with real data. To use it, submit with `-profile uppmax,devel`. + +## Running on Bianca + +For security reasons, there is no internet access on Bianca so you can't download from or upload files to the cluster directly. Before running a nf-core pipeline on Bianca you will first have to download the pipeline and singularity images needed elsewhere and transfer them via the wharf area to your Bianca project. + +You can follow the guide for downloading pipelines [for offline use](https://nf-co.re/tools#downloading-pipelines-for-offline-use). Note that you will have to download the singularity images as well. + +Next transfer the pipeline and the singularity images to your project. Before running the pipeline you will have to indicate to nextflow where the singularity images are located by setting `NXF_SINGULARITY_CACHEDIR` : + +`export NXF_SINGULARITY_CACHEDIR=Your_Location_For_The_Singularity_directory/.` + +You should now be able to run your nf-core pipeline on bianca. diff --git a/nfcore_custom.config b/nfcore_custom.config index 2b3c419..429e3c1 100644 --- a/nfcore_custom.config +++ b/nfcore_custom.config @@ -24,6 +24,8 @@ profiles { czbiohub_aws { includeConfig "${params.custom_config_base}/conf/czbiohub_aws.config" } ebc { includeConfig "${params.custom_config_base}/conf/ebc.config" } icr_davros { includeConfig "${params.custom_config_base}/conf/icr_davros.config" } + imperial { includeConfig "${params.custom_config_base}/conf/imperial.config" } + imperial_mb { includeConfig "${params.custom_config_base}/conf/imperial_mb.config" } genotoul { includeConfig "${params.custom_config_base}/conf/genotoul.config" } google { includeConfig "${params.custom_config_base}/conf/google.config" } denbi_qbic { includeConfig "${params.custom_config_base}/conf/denbi_qbic.config" } @@ -33,9 +35,11 @@ profiles { kraken { includeConfig "${params.custom_config_base}/conf/kraken.config" } mpcdf { includeConfig "${params.custom_config_base}/conf/mpcdf.config" } munin { includeConfig "${params.custom_config_base}/conf/munin.config" } + oist { includeConfig "${params.custom_config_base}/conf/oist.config" } pasteur { includeConfig "${params.custom_config_base}/conf/pasteur.config" } phoenix { includeConfig "${params.custom_config_base}/conf/phoenix.config" } prince { includeConfig "${params.custom_config_base}/conf/prince.config" } + seg_globe { includeConfig "${params.custom_config_base}/conf/seg_globe.config"} shh { includeConfig "${params.custom_config_base}/conf/shh.config" } uct_hpc { includeConfig "${params.custom_config_base}/conf/uct_hpc.config" } uppmax { includeConfig "${params.custom_config_base}/conf/uppmax.config" } @@ -54,6 +58,8 @@ params { cfc: ['.hpc.uni-tuebingen.de'], crick: ['.thecrick.org'], icr_davros: ['.davros.compute.estate'], + imperial: ['.hpc.ic.ac.uk'], + imperial_mb: ['.hpc.ic.ac.uk'], genotoul: ['.genologin1.toulouse.inra.fr', '.genologin2.toulouse.inra.fr'], genouest: ['.genouest.org'], uppmax: ['.uppmax.uu.se'], diff --git a/pipeline/scflow.config b/pipeline/scflow.config new file mode 100644 index 0000000..8c4697f --- /dev/null +++ b/pipeline/scflow.config @@ -0,0 +1,14 @@ +/* + * ------------------------------------------------- + * nfcore/scflow custom profile Nextflow config file + * ------------------------------------------------- + * Config options for custom environments. + * Cluster-specific config options should be saved + * in the conf/pipeline/scflow folder and imported + * under a profile name here. + */ + +profiles { + imperial { includeConfig "${params.custom_config_base}/conf/pipeline/scflow/imperial.config" } + imperial_mb { includeConfig "${params.custom_config_base}/conf/pipeline/scflow/imperial.config" } // intended +} \ No newline at end of file