From a9b62c53e0bece4fb578ff1168f72e820c3be6b9 Mon Sep 17 00:00:00 2001 From: lquayle88 Date: Tue, 13 Sep 2022 17:02:44 +0100 Subject: [PATCH 1/6] Added global institutional and pipeline configs for sbc_sharc --- .github/workflows/main.yml | 1 + README.md | 1 + conf/pipeline/atacseq/sbc_sharc.config | 74 ++++++ conf/pipeline/chipseq/sbc_sharc.config | 74 ++++++ conf/pipeline/rnaseq/sbc_sharc.config | 79 ++++++ conf/pipeline/sarek/sbc_sharc.config | 114 ++++++++ conf/sbc_sharc.config | 59 ++++ docs/sbc_sharc.md | 355 +++++++++++++++++++++++++ nfcore_custom.config | 1 + pipeline/atacseq.config | 13 + pipeline/chipseq.config | 13 + pipeline/rnaseq.config | 1 + pipeline/sarek.config | 1 + 13 files changed, 786 insertions(+) create mode 100644 conf/pipeline/atacseq/sbc_sharc.config create mode 100644 conf/pipeline/chipseq/sbc_sharc.config create mode 100644 conf/pipeline/rnaseq/sbc_sharc.config create mode 100644 conf/pipeline/sarek/sbc_sharc.config create mode 100644 conf/sbc_sharc.config create mode 100644 docs/sbc_sharc.md create mode 100644 pipeline/atacseq.config create mode 100644 pipeline/chipseq.config diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 487c7ee..ca01a15 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -83,6 +83,7 @@ jobs: - "sage" - "sahmri" - "sanger" + - "sbc_sharc" - "seg_globe" - "uct_hpc" - "unibe_ibu" diff --git a/README.md b/README.md index 445467b..0cad49f 100644 --- a/README.md +++ b/README.md @@ -137,6 +137,7 @@ Currently documentation is available for the following systems: - [ROSALIND](docs/rosalind.md) - [SAGE BIONETWORKS](docs/sage.md) - [SANGER](docs/sanger.md) +- [SBC_SHARC](docs/sbc_sharc.md) - [SEG_GLOBE](docs/seg_globe.md) - [UCT_HPC](docs/uct_hpc.md) - [UNIBE_IBU](docs/unibe_ibu.md) diff --git a/conf/pipeline/atacseq/sbc_sharc.config b/conf/pipeline/atacseq/sbc_sharc.config new file mode 100644 index 0000000..2e987d2 --- /dev/null +++ b/conf/pipeline/atacseq/sbc_sharc.config @@ -0,0 +1,74 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + Sheffield Bioinformatics Core Configuration Profile - ShARC + +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + Custom Pipeline Resource Config for nf-core/atacseq + +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + + +// process-specific resource requirements - reduced specification from those in rnaseq/conf/base.config + +process { + + withLabel:process_low { + cpus = { check_max( 2 * task.attempt, 'cpus' ) } + memory = { check_max( 4.GB * task.attempt, 'memory' ) } + time = { check_max( 4.h * task.attempt, 'time' ) } + } + + withLabel:process_medium { + cpus = { check_max( 4 * task.attempt, 'cpus' ) } + memory = { check_max( 8.GB * task.attempt, 'memory' ) } + time = { check_max( 6.h * task.attempt, 'time' ) } + } + + withLabel:process_high { + cpus = { check_max( 8 * task.attempt, 'cpus' ) } + memory = { check_max( 16.GB * task.attempt, 'memory' ) } + time = { check_max( 8.h * task.attempt, 'time' ) } + } + + withLabel:process_long { + time = { check_max( 12.h * task.attempt, 'time' ) } + } + +} + + +// function 'check_max()' to ensure that resource requirements don't go beyond maximum limit + +def check_max(obj, type) { + if (type == 'memory') { + try { + if (obj.compareTo(params.max_memory as nextflow.util.MemoryUnit) == 1) + return params.max_memory as nextflow.util.MemoryUnit + else + return obj + } catch (all) { + println " ### ERROR ### Max memory '${params.max_memory}' is not valid! Using default value: $obj" + return obj + } + } else if (type == 'time') { + try { + if (obj.compareTo(params.max_time as nextflow.util.Duration) == 1) + return params.max_time as nextflow.util.Duration + else + return obj + } catch (all) { + println " ### ERROR ### Max time '${params.max_time}' is not valid! Using default value: $obj" + return obj + } + } else if (type == 'cpus') { + try { + return Math.min(obj, params.max_cpus as int) + } catch (all) { + println " ### ERROR ### Max cpus '${params.max_cpus}' is not valid! Using default value: $obj" + return obj + } + } +} diff --git a/conf/pipeline/chipseq/sbc_sharc.config b/conf/pipeline/chipseq/sbc_sharc.config new file mode 100644 index 0000000..2741453 --- /dev/null +++ b/conf/pipeline/chipseq/sbc_sharc.config @@ -0,0 +1,74 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + Sheffield Bioinformatics Core Configuration Profile - ShARC + +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + Custom Pipeline Resource Config for nf-core/chipseq + +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + + +// process-specific resource requirements - reduced specification from those in rnaseq/conf/base.config + +process { + + withLabel:process_low { + cpus = { check_max( 2 * task.attempt, 'cpus' ) } + memory = { check_max( 4.GB * task.attempt, 'memory' ) } + time = { check_max( 4.h * task.attempt, 'time' ) } + } + + withLabel:process_medium { + cpus = { check_max( 4 * task.attempt, 'cpus' ) } + memory = { check_max( 8.GB * task.attempt, 'memory' ) } + time = { check_max( 6.h * task.attempt, 'time' ) } + } + + withLabel:process_high { + cpus = { check_max( 8 * task.attempt, 'cpus' ) } + memory = { check_max( 16.GB * task.attempt, 'memory' ) } + time = { check_max( 8.h * task.attempt, 'time' ) } + } + + withLabel:process_long { + time = { check_max( 12.h * task.attempt, 'time' ) } + } + +} + + +// function 'check_max()' to ensure that resource requirements don't go beyond maximum limit + +def check_max(obj, type) { + if (type == 'memory') { + try { + if (obj.compareTo(params.max_memory as nextflow.util.MemoryUnit) == 1) + return params.max_memory as nextflow.util.MemoryUnit + else + return obj + } catch (all) { + println " ### ERROR ### Max memory '${params.max_memory}' is not valid! Using default value: $obj" + return obj + } + } else if (type == 'time') { + try { + if (obj.compareTo(params.max_time as nextflow.util.Duration) == 1) + return params.max_time as nextflow.util.Duration + else + return obj + } catch (all) { + println " ### ERROR ### Max time '${params.max_time}' is not valid! Using default value: $obj" + return obj + } + } else if (type == 'cpus') { + try { + return Math.min(obj, params.max_cpus as int) + } catch (all) { + println " ### ERROR ### Max cpus '${params.max_cpus}' is not valid! Using default value: $obj" + return obj + } + } +} diff --git a/conf/pipeline/rnaseq/sbc_sharc.config b/conf/pipeline/rnaseq/sbc_sharc.config new file mode 100644 index 0000000..52bf0ff --- /dev/null +++ b/conf/pipeline/rnaseq/sbc_sharc.config @@ -0,0 +1,79 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + Sheffield Bioinformatics Core Configuration Profile - ShARC + +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + Custom Pipeline Resource Config for nf-core/rnaseq + +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + + +// process-specific resource requirements - reduced specification from those in rnaseq/conf/base.config + +process { + + withLabel:process_low { + cpus = { check_max( 2 * task.attempt, 'cpus' ) } + memory = { check_max( 4.GB * task.attempt, 'memory' ) } + time = { check_max( 4.h * task.attempt, 'time' ) } + } + + withLabel:process_medium { + cpus = { check_max( 4 * task.attempt, 'cpus' ) } + memory = { check_max( 8.GB * task.attempt, 'memory' ) } + time = { check_max( 6.h * task.attempt, 'time' ) } + } + + withLabel:process_high { + cpus = { check_max( 8 * task.attempt, 'cpus' ) } + memory = { check_max( 16.GB * task.attempt, 'memory' ) } + time = { check_max( 8.h * task.attempt, 'time' ) } + } + + withLabel:process_long { + time = { check_max( 12.h * task.attempt, 'time' ) } + } + + withLabel:process_high_memory { + memory = { check_max( 60.GB * task.attempt, 'memory' ) } + } + +} + + +// function 'check_max()' to ensure that resource requirements don't go beyond maximum limit + +def check_max(obj, type) { + if (type == 'memory') { + try { + if (obj.compareTo(params.max_memory as nextflow.util.MemoryUnit) == 1) + return params.max_memory as nextflow.util.MemoryUnit + else + return obj + } catch (all) { + println " ### ERROR ### Max memory '${params.max_memory}' is not valid! Using default value: $obj" + return obj + } + } else if (type == 'time') { + try { + if (obj.compareTo(params.max_time as nextflow.util.Duration) == 1) + return params.max_time as nextflow.util.Duration + else + return obj + } catch (all) { + println " ### ERROR ### Max time '${params.max_time}' is not valid! Using default value: $obj" + return obj + } + } else if (type == 'cpus') { + try { + return Math.min(obj, params.max_cpus as int) + } catch (all) { + println " ### ERROR ### Max cpus '${params.max_cpus}' is not valid! Using default value: $obj" + return obj + } + } +} + diff --git a/conf/pipeline/sarek/sbc_sharc.config b/conf/pipeline/sarek/sbc_sharc.config new file mode 100644 index 0000000..204d73b --- /dev/null +++ b/conf/pipeline/sarek/sbc_sharc.config @@ -0,0 +1,114 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + Sheffield Bioinformatics Core Configuration Profile - ShARC + +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + Custom Pipeline Resource Config for nf-core/sarek + +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + + +// process-specific resource requirements - reduced specification from those in sarek/conf/base.config + +process { + + + // process labels + + withLabel:process_low { + cpus = { check_max( 2 * task.attempt, 'cpus' ) } + memory = { check_max( 4.GB * task.attempt, 'memory' ) } + time = { check_max( 4.h * task.attempt, 'time' ) } + } + + withLabel:process_medium { + cpus = { check_max( 4 * task.attempt, 'cpus' ) } + memory = { check_max( 8.GB * task.attempt, 'memory' ) } + time = { check_max( 6.h * task.attempt, 'time' ) } + } + + withLabel:process_high { + cpus = { check_max( 8 * task.attempt, 'cpus' ) } + memory = { check_max( 16.GB * task.attempt, 'memory' ) } + time = { check_max( 8.h * task.attempt, 'time' ) } + } + + withLabel:process_long { + time = { check_max( 12.h * task.attempt, 'time' ) } + } + + withLabel:process_high_memory { + memory = { check_max( 60.GB * task.attempt, 'memory' ) } + } + + + // process name + + withName:'BWAMEM1_MEM|BWAMEM2_MEM' { + cpus = { check_max( 12 * task.attempt, 'cpus' ) } + memory = { check_max( 16.GB * task.attempt, 'memory' ) } + time = { check_max( 8.h * task.attempt, 'time' ) } + } + + withName:'FASTP' { + cpus = { check_max( 12 * task.attempt, 'cpus' ) } + } + + withName:'FASTQC|FASTP|MOSDEPTH|SAMTOOLS_CONVERT' { + memory = { check_max( 4.GB * task.attempt, 'memory' ) } + } + + withName:'GATK4_APPLYBQSR|GATK4_APPLYBQSR_SPARK|GATK4_BASERECALIBRATOR|SAMTOOLS_STATS' { + cpus = { check_max( 4 * task.attempt, 'cpus' ) } + } + + withName:'GATK4_APPLYBQSR|GATK4_APPLYBQSR_SPARK|GATK4_BASERECALIBRATOR|GATK4_GATHERBQSRREPORTS' { + memory = { check_max( 16.GB * task.attempt, 'memory' ) } + } + + withName:'GATK4_MARKDUPLICATES' { + memory = { check_max( 16.GB * task.attempt, 'memory' ) } + } + + withName:'FREEBAYES|SAMTOOLS_STATS|SAMTOOLS_INDEX|UNZIP' { + cpus = { check_max( 1 * task.attempt, 'cpus' ) } + } + +} + + +// function 'check_max()' to ensure that resource requirements don't go beyond maximum limit + +def check_max(obj, type) { + if (type == 'memory') { + try { + if (obj.compareTo(params.max_memory as nextflow.util.MemoryUnit) == 1) + return params.max_memory as nextflow.util.MemoryUnit + else + return obj + } catch (all) { + println " ### ERROR ### Max memory '${params.max_memory}' is not valid! Using default value: $obj" + return obj + } + } else if (type == 'time') { + try { + if (obj.compareTo(params.max_time as nextflow.util.Duration) == 1) + return params.max_time as nextflow.util.Duration + else + return obj + } catch (all) { + println " ### ERROR ### Max time '${params.max_time}' is not valid! Using default value: $obj" + return obj + } + } else if (type == 'cpus') { + try { + return Math.min(obj, params.max_cpus as int) + } catch (all) { + println " ### ERROR ### Max cpus '${params.max_cpus}' is not valid! Using default value: $obj" + return obj + } + } +} diff --git a/conf/sbc_sharc.config b/conf/sbc_sharc.config new file mode 100644 index 0000000..ca41185 --- /dev/null +++ b/conf/sbc_sharc.config @@ -0,0 +1,59 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + Sheffield Bioinformatics Core Configuration Profile - ShARC + +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + Base Institutional Configuration + +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + + +params { + + // nf-core specific parameters displayed in header summary of each run + + config_profile_description = 'Sheffield Bioinformatics Core - ShARC' + config_profile_contact = 'Lewis Quayle (l.quayle@sheffield.ac.uk)' + config_profile_url = 'https://docs.hpc.shef.ac.uk/en/latest/sharc/index.html' + + // hpc resource limits + + max_cpus = 16 + max_memory = 64.GB + max_time = 96.h + +} + + +// container engine + +singularity { + + enabled = true + autoMounts = true + // cacheDir = '////' + +} + + +// hpc configuration specific to ShARC + +process { + + // scheduler + + executor = 'sge' + penv = 'smp' + queue = { task.time <= 6.h ? 'shortint.q' : 'all.q' } + clusterOptions = { "-l rmem=${task.memory.toGiga()}G" } + + // error and retry handling + + errorStrategy = { task.exitStatus in [143,137,104,134,139,140] ? 'retry' : 'finish' } + maxRetries = 2 + +} + diff --git a/docs/sbc_sharc.md b/docs/sbc_sharc.md new file mode 100644 index 0000000..7b055d1 --- /dev/null +++ b/docs/sbc_sharc.md @@ -0,0 +1,355 @@ +# nf-core/configs: Sheffield Bioinformatics Core Facility ShARC Configuration + +The following nf-core pipelines have been successfully configured for use on the the [University of Sheffield ShARC cluster](https://docs.hpc.shef.ac.uk/en/latest/index.html): + +- [atacseq](https://nf-co.re/atacseq) +- [chipseq](https://nf-co.re/chipseq) +- [rnaseq](https://nf-co.re/rnaseq) +- [sarek](https://nf-co.re/sarek) + +When using [`sbc_sharc.config`](../conf/sbc_sharc.config) with the pipelines listed above, the appropriate configuration file from the list below will be loaded automatically: + +- atacseq: [sbc_sharc atacseq config](../conf/pipeline/atacseq/sbc_sharc.config) +- chipseq: [sbc_sharc chipseq config](../conf/pipeline/chipseq/sbc_sharc.config) +- rnaseq: [sbc_sharc rnaseq config](../conf/pipeline/rnaseq/sbc_sharc.config) +- sarek: [sbc_sharc sarek config](../conf/pipeline/sarek/sbc_sharc.config) + +The [`sbc_sharc.config`](../conf/sbc_sharc.config) configuration file might work with other nf-core pipelines as it stands but we cannot guarantee they will run without issue. We will be continuing to create, test and optimise configurations for new pipelines in the future. + + +## Using the SBC_ShARC Institutional Configuration Profile + +To use [`sbc_sharc.config`](../conf/sbc_sharc.config), run nextflow with an nf-core pipeline using `-profile sbc_sharc` (note the single hyphen). This will download and launch [`sbc_sharc.config`](../conf/sbc_sharc.config) which has been pre-configured with a setup suitable for the ShARC cluster and will automatically load the appropriate pipeline-specific configuration file. + +For a full guide on how to setup and run Nextflow using nf-core pipelines on ShARC, see the **Running Nextflow with nf-core Pipelines on ShARC** section below. + + +## A Note on Singularity Containers + +The [`sbc_sharc.config`](../conf/sbc_sharc.config) configuration file supports running nf-core pipelines with Singularity containers; Singularity images will be downloaded automatically before execution of the pipeline. + +Please read the **Configure Singularity for use with Nextflow and nf-core** sub-section below. + + +## Running Nextflow with nf-core Pipelines on ShARC + +Nextflow is not currently available on ShARC as an environmental software module. The most simple solution to this issue is to install Nextflow and nf-core using a personal install of miniconda. This guide will describe the main steps, which are to: + +1. Install miniconda as a personal software module +2. Load and configure conda +3. Install Nextflow and nf-core within a conda environment +4. Configure Singularity for use with Nextflow and nf-core +5. Setup your project directory and configure your run +6. Submit your run to the SGE scheduler + + +### 1. Install Miniconda as a Personal Software Module + +Connect to ShARC via SSH and login to a worker node via an interactive session. + +```shell +# login +ssh -X username@sharc.shef.ac.uk + +# request a command line only interactive session - some extra resources prevent issues building conda env later +qrsh -l rmem=4G -pe smp 2 +``` + +Navigate your folder within the data area of the file store. + +```shell +cd /data/$USER +``` + +Download and run the miniconda installer by running the following series of commands: + +```shell +# download the latest installer file +wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh + +# check the hashes match +sha256sum Miniconda3-latest-Linux-x86_64.sh + +# make the file executable +chmod +x Miniconda3-latest-Linux-x86_64.sh + +# run the installer +bash Miniconda3-latest-Linux-x86_64.sh +``` + +The miniconda installer will now run and walk you through the install process. There are two **IMPORTANT** things you must take note of at this point: + +1. You will have to overwrite the default install path when prompted by the miniconda installer to check the install path - the directory to which the install is attempted should be `/data/$USER/miniconda`. + +``` +Miniconda3 will now be installed into this location: +///miniconda3 + + - Press ENTER to confirm the location + - Press CTRL-C to abort the installation + - Or specify a different location below + +[///miniconda3] >>> /data/$USER/miniconda +``` + +2. **DO NOT** initialize miniconda at the end of the install process when prompted as shown here: + +``` +Do you wish the installer to initialize Miniconda3 +by running conda init? [yes|no] +[yes] >>> no +``` + +Once the installer has run, delete the installation script. + +```shell +rm Miniconda3-latest-Linux-x86_64.sh +``` + +Now make a modules folder and module file. + +```shell +# modules folder +mkdir /home/$USER/modules + +# module file +nano /home/$USER/modules/miniconda +``` + +Paste the below into the nano editor that opens upon running the final command. Note that this file is in Tcl not BASh, so environmental variable handing is different from the normal `$USER` for username. + +``` +#%Module10.2##################################################################### +## +## User Data Directory Miniconda module file +## +################################################################################ + +proc ModulesHelp { } { + global version + + puts stderr "Makes a user's personal install of Miniconda available." +} + +module-whatis "Makes a user's personal install of Miniconda available." + +# module variables + +set MINICONDA_DIR /data/$env(USER)/miniconda/bin + +prepend-path PATH $MINICONDA_DIR +``` + +Now run the following line to make your personal modules available for loading whenever you login. + +```shell +echo "module use /home/$USER/modules" >> ~/.bashrc +``` + +The last thing to note here is that you should not load the anaconda environmental module available to all HPC users and the personal miniconda module you have just made at the same time. + +For further information on making software available via a custom module file visit: + +[Making software available via a custom module file](https://docs.hpc.shef.ac.uk/en/latest/referenceinfo/environment-modules/creating-custom-modulefiles.html) + + +## 2. Load and Configure Conda + +Run the following commands in order and follow any prompts as appropriate: + +```shell +# load the miniconda module - if not already loaded +module load miniconda + +# disable base environment auto-activation +conda config --set auto_activate_base false + +# add the bioconda and conda-forge channels to conda configuration +conda config --add channels bioconda +conda config --add channels conda-forge + +# set channel_priority to "strict" +conda config --set channel_priority strict + +# ensure conda is up-to-date +conda update conda +``` + + +## 3. Install Nextflow and nf-core within a Conda Environment + +Run the following commands in order and follow any prompts as appropriate: + +```shell +# make the "nf_env" environment (in /home/$USER/.conda/envs/nf_env) +conda create --name nf_env nextflow nf-core + +# activate the environment +source activate nf_env + +# ensure all packages are up-to-date +conda update --all +``` + +You can now test the install has worked by running the following: + +```shell +# test the environment is working +nextflow info + +# test functionality +nextflow run hello +``` + +When you are finished, you can deactivate your conda environment using the command `conda deactivate`. + +Although you should not yet do this, should you wish to unload your personal miniconda module you can do so by running `module unload miniconda`. + +Step 5. describes the process of running an nf-core pipeline using Nextflow. You do not have to have a conda environment active for this part of the process as it will be loaded as part of your submission script, but you should not unload the miniconda module at this point. + + +## 4. Configure Singularity for use with Nextflow and nf-core + +When you run nextflow for the first time, Singularity will create a hidden directory `.singularity` in your `$HOME` directory `/home/$USER` which has very very limited (10GB) space available. It is therefore a good idea to create a directory somewhere else (e.g., `/data/$USER`) with more room and link the locations. To do this, run the following series of commands: + +```shell +# change directory to $HOME +cd $HOME + +# make the directory that will be linked to +mkdir /data/$USER/.singularity + +# link the new directory with the existing one +ln -s /data/$USER/.singularity .singularity +``` + + +## 5. Setup your Project and Configure your Run + +Whichever file store you decide to locate your project root directory in, the assumed project sub-directory structure within this guide is as follows: + +``` +/filestore/$USER/ +│ +└── project_root/ + │ + ├── config + ├── params + ├── sample_sheet + └── script +``` + +There are three things you will require to run an nf-core pipeline: + +1. A sample sheet +2. A pipeline launcher parameter configuration file +3. A submission script + +You can find nf-core pipelines by visiting [https://nf-co.re/pipelines](https://nf-co.re/pipelines). Each pipeline page has more information on how to use the pipeline as well as a full description of sample sheet requirements and formatting. + +Your sample sheet should be located inside your `sample_sheet` sub-directory. + +The general launch command in the script template below assumes you have configured your specific run using an nf-core pipeline launcher. For example, the launcher for the nf-core/rnaseq pipeline that can be found [here](https://nf-co.re/launch?pipeline=rnaseq). The parameters specified for your run using the launcher should be saved in a file named `nf-params.json` within the `params` sub-directory of your project root. + +To create your run script, navigate to the `script` sub-directory and run the following: + +```shell +nano nf_submission.sh +``` + +Paste the below into the editor ensuring to change the generic information for your own where indicated in the comment lines: + +```shell +#!/bin/bash + +## SGE scheduler flags + +# job name >>> edit "pipeline_name" for the name of the pipeline you are running e.g. rnaseq <<< +#$ -N nf-pipeline_name + +# specify queue and project for the nextflow driver job >>> keep and edit if using a priority queue else delete both <<< +#$ -q queue_name.q +#$ -P queue_name + +# request resources for the nextflow driver job +#$ -pe smp 1 +#$ -l rmem=2G + +# export environmental variables in current shell environment to job +#$ -V + +# send email >>> edit "username" <<< +#$ -M username@sheffield.ac.uk +#$ -m beas + +# merge standard error stream into the standard output stream +#$ -j y + +# output log file +#$ -o nextflow.log + + +## load miniconda module and activate analysis environment + +module load miniconda +source activate nf_env + + +## define and export variables + +# prevent java vm requesting too much memory and killing run +export NXF_OPTS="-Xms1g -Xmx2g" + +# path to singularity cache +export NXF_SINGULARITY_CACHEDIR="/home/$USER/.singularity" + +# project name >>> edit "project_name" so that it is the name of your project root directory <<< +PROJECT="project_name" + +# project directories >>> edit the name of the "filestore" e.g. fastdata <<< +PARAM_DIR="/filestore/$USER/$PROJECT/params" +CONFIG_DIR="/filestore/$USER/$PROJECT/config" + + +## run command >>> edit "pipeline" and "version" <<< + +nextflow run nf-core/pipeline \ +-r version \ +-profile sbc_sharc \ +-resume \ +-params-file ${PARAM_DIR}/nf-params.json + +``` + +Now save and exit by typing "Ctrl + O" then "Return" then "Ctrl + X". + +**OPTIONAL:** If you have specified a priority access queue in your submission script, you will need a personal configuration to send your jobs and not just your driver script to the appropriate queue. Navigate to the `config` sub-directory of your project folder and run the following: + +```shell +nano personal.config +``` + +Then paste the following into the editor, ensuring you enter the correct queue name: + +``` +process { + queue = 'queue-name.q' + clusterOptions = { "-P queue-name -l rmem=${task.memory.toGiga()}G" } +} +``` + +Save and exit by typing "Ctrl + O" then "Return" then "Ctrl + X". + +Now append `-c ${CONFIG_DIR}/personal.config` to the `nextflow run` command on a new line in your submission script. + + +## 6. Submit your Run to the SGE Scheduler + +Once you have fulfilled all of the requirements above, you should be ready to submit your batch job to the SGE scheduler on ShARC. From the project root, type the following: + +```bash +qsub ./scripts/nf_submission.sh +``` + +Your pipeline run should start momentarily. Good Luck! + diff --git a/nfcore_custom.config b/nfcore_custom.config index fae764b..abf163b 100644 --- a/nfcore_custom.config +++ b/nfcore_custom.config @@ -65,6 +65,7 @@ profiles { sage { includeConfig "${params.custom_config_base}/conf/sage.config" } sahmri { includeConfig "${params.custom_config_base}/conf/sahmri.config" } sanger { includeConfig "${params.custom_config_base}/conf/sanger.config"} + sbc_sharc { includeConfig "${params.custom_config_base}/conf/sbc_sharc.config"} seg_globe { includeConfig "${params.custom_config_base}/conf/seg_globe.config"} uct_hpc { includeConfig "${params.custom_config_base}/conf/uct_hpc.config" } unibe_ibu { includeConfig "${params.custom_config_base}/conf/unibe_ibu.config" } diff --git a/pipeline/atacseq.config b/pipeline/atacseq.config new file mode 100644 index 0000000..f205f62 --- /dev/null +++ b/pipeline/atacseq.config @@ -0,0 +1,13 @@ +/* + * ------------------------------------------------- + * nfcore/atacseq custom profile Nextflow config file + * ------------------------------------------------- + * Config options for custom environments. + * Cluster-specific config options should be saved + * in the conf/pipeline/atacseq folder and imported + * under a profile name here. + */ + +profiles { + sbc_sharc { includeConfig "${params.custom_config_base}/conf/pipeline/atacseq/sbc_sharc.config" } +} diff --git a/pipeline/chipseq.config b/pipeline/chipseq.config new file mode 100644 index 0000000..242aa92 --- /dev/null +++ b/pipeline/chipseq.config @@ -0,0 +1,13 @@ +/* + * ------------------------------------------------- + * nfcore/chipseq custom profile Nextflow config file + * ------------------------------------------------- + * Config options for custom environments. + * Cluster-specific config options should be saved + * in the conf/pipeline/chipseq folder and imported + * under a profile name here. + */ + +profiles { + sbc_sharc { includeConfig "${params.custom_config_base}/conf/pipeline/chipseq/sbc_sharc.config" } +} diff --git a/pipeline/rnaseq.config b/pipeline/rnaseq.config index 0486d86..b1d470f 100644 --- a/pipeline/rnaseq.config +++ b/pipeline/rnaseq.config @@ -11,5 +11,6 @@ profiles { eddie { includeConfig "${params.custom_config_base}/conf/pipeline/rnaseq/eddie.config" } mpcdf { includeConfig "${params.custom_config_base}/conf/pipeline/rnaseq/mpcdf.config" } + sbc_sharc { includeConfig "${params.custom_config_base}/conf/pipeline/rnaseq/sbc_sharc.config" } utd_sysbio { includeConfig "${params.custom_config_base}/conf/pipeline/rnaseq/utd_sysbio.config" } } diff --git a/pipeline/sarek.config b/pipeline/sarek.config index 12676b2..3c087aa 100644 --- a/pipeline/sarek.config +++ b/pipeline/sarek.config @@ -15,5 +15,6 @@ profiles { eddie { includeConfig "${params.custom_config_base}/conf/pipeline/sarek/eddie.config" } icr_davros { includeConfig "${params.custom_config_base}/conf/pipeline/sarek/icr_davros.config" } munin { includeConfig "${params.custom_config_base}/conf/pipeline/sarek/munin.config" } + sbc_sharc { includeConfig "${params.custom_config_base}/conf/pipeline/sarek/sbc_sharc.config" } uppmax { includeConfig "${params.custom_config_base}/conf/pipeline/sarek/uppmax.config" } } From 9fef35d7d3b4153eeade3af17fc08769bab000fb Mon Sep 17 00:00:00 2001 From: lquayle88 Date: Tue, 13 Sep 2022 17:08:56 +0100 Subject: [PATCH 2/6] Updated pipeline configs for atacseq and chipseq --- conf/pipeline/atacseq/sbc_sharc.config | 2 +- conf/pipeline/chipseq/sbc_sharc.config | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/conf/pipeline/atacseq/sbc_sharc.config b/conf/pipeline/atacseq/sbc_sharc.config index 2e987d2..e50695c 100644 --- a/conf/pipeline/atacseq/sbc_sharc.config +++ b/conf/pipeline/atacseq/sbc_sharc.config @@ -11,7 +11,7 @@ */ -// process-specific resource requirements - reduced specification from those in rnaseq/conf/base.config +// process-specific resource requirements - reduced specification from those in atacseq/conf/base.config process { diff --git a/conf/pipeline/chipseq/sbc_sharc.config b/conf/pipeline/chipseq/sbc_sharc.config index 2741453..60912f3 100644 --- a/conf/pipeline/chipseq/sbc_sharc.config +++ b/conf/pipeline/chipseq/sbc_sharc.config @@ -11,7 +11,7 @@ */ -// process-specific resource requirements - reduced specification from those in rnaseq/conf/base.config +// process-specific resource requirements - reduced specification from those in chipseq/conf/base.config process { From 61ae4636061d4f1296f363cc0929fb5e301bc5c7 Mon Sep 17 00:00:00 2001 From: Lewis Quayle <90088916+lquayle88@users.noreply.github.com> Date: Tue, 13 Sep 2022 17:13:57 +0100 Subject: [PATCH 3/6] Updated sbc_sharc.md --- docs/sbc_sharc.md | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/docs/sbc_sharc.md b/docs/sbc_sharc.md index 7b055d1..7d5bcd3 100644 --- a/docs/sbc_sharc.md +++ b/docs/sbc_sharc.md @@ -2,17 +2,17 @@ The following nf-core pipelines have been successfully configured for use on the the [University of Sheffield ShARC cluster](https://docs.hpc.shef.ac.uk/en/latest/index.html): -- [atacseq](https://nf-co.re/atacseq) -- [chipseq](https://nf-co.re/chipseq) -- [rnaseq](https://nf-co.re/rnaseq) -- [sarek](https://nf-co.re/sarek) +- [nf-co.re/atacseq](https://nf-co.re/atacseq) +- [nf-co.re/chipseq](https://nf-co.re/chipseq) +- [nf-co.re/rnaseq](https://nf-co.re/rnaseq) +- [nf-co.re/sarek](https://nf-co.re/sarek) When using [`sbc_sharc.config`](../conf/sbc_sharc.config) with the pipelines listed above, the appropriate configuration file from the list below will be loaded automatically: -- atacseq: [sbc_sharc atacseq config](../conf/pipeline/atacseq/sbc_sharc.config) -- chipseq: [sbc_sharc chipseq config](../conf/pipeline/chipseq/sbc_sharc.config) -- rnaseq: [sbc_sharc rnaseq config](../conf/pipeline/rnaseq/sbc_sharc.config) -- sarek: [sbc_sharc sarek config](../conf/pipeline/sarek/sbc_sharc.config) +- [atacseq sbc_sharc.config](../conf/pipeline/atacseq/sbc_sharc.config) +- [chipseq sbc_sharc.config](../conf/pipeline/chipseq/sbc_sharc.config) +- [rnaseq sbc_sharc.config](../conf/pipeline/rnaseq/sbc_sharc.config) +- [sarek sbc_sharc.config](../conf/pipeline/sarek/sbc_sharc.config) The [`sbc_sharc.config`](../conf/sbc_sharc.config) configuration file might work with other nf-core pipelines as it stands but we cannot guarantee they will run without issue. We will be continuing to create, test and optimise configurations for new pipelines in the future. From 91bfa7341f6a79e36eb8610350230ab2c92d4cef Mon Sep 17 00:00:00 2001 From: lquayle88 Date: Wed, 14 Sep 2022 08:42:40 +0100 Subject: [PATCH 4/6] Made changes requested for PR --- README.md | 7 + conf/sbc_sharc.config | 2 - docs/pipeline/atacseq/sbc_sharc.md | 11 + docs/pipeline/chipseq/sbc_sharc.md | 11 + docs/pipeline/rnaseq/sbc_sharc.md | 11 + docs/pipeline/sarek/sbc_sharc.md | 11 + docs/sbc_sharc.md | 326 +---------------------------- 7 files changed, 58 insertions(+), 321 deletions(-) create mode 100644 docs/pipeline/atacseq/sbc_sharc.md create mode 100644 docs/pipeline/chipseq/sbc_sharc.md create mode 100644 docs/pipeline/rnaseq/sbc_sharc.md create mode 100644 docs/pipeline/sarek/sbc_sharc.md diff --git a/README.md b/README.md index 0cad49f..4544b05 100644 --- a/README.md +++ b/README.md @@ -198,6 +198,10 @@ Currently documentation is available for the following pipelines within specific - ampliseq - [BINAC](docs/pipeline/ampliseq/binac.md) - [UPPMAX](docs/pipeline/ampliseq/uppmax.md) +- atacseq + - [SBC_SHARC](docs/pipeline/atacseq/sbc_sharc.md) +- chipseq + - [SBC_SHARC](docs/pipeline/chipseq/sbc_sharc.md) - eager - [EVA](docs/pipeline/eager/eva.md) - mag @@ -205,11 +209,14 @@ Currently documentation is available for the following pipelines within specific - rnafusion - [HASTA](docs/pipeline/rnafusion/hasta.md) - [MUNIN](docs/pipeline/rnafusion/munin.md) +- rnaseq + - [SBC_SHARC](docs/pipeline/rnaseq/sbc_sharc.md) - rnavar - [MUNIN](docs/pipeline/rnavar/munin.md) - sarek - [Cancer Research UK Manchester Institute](docs/pipeline/sarek/crukmi.md) - [MUNIN](docs/pipeline/sarek/munin.md) + - [SBC_SHARC](docs/pipeline/sarek/sbc_sharc.md) - [UPPMAX](docs/pipeline/sarek/uppmax.md) - taxprofiler - [EVA](docs/pipeline/taxprofiler/eva.md) diff --git a/conf/sbc_sharc.config b/conf/sbc_sharc.config index ca41185..20b8661 100644 --- a/conf/sbc_sharc.config +++ b/conf/sbc_sharc.config @@ -34,7 +34,6 @@ singularity { enabled = true autoMounts = true - // cacheDir = '////' } @@ -56,4 +55,3 @@ process { maxRetries = 2 } - diff --git a/docs/pipeline/atacseq/sbc_sharc.md b/docs/pipeline/atacseq/sbc_sharc.md new file mode 100644 index 0000000..f73d79b --- /dev/null +++ b/docs/pipeline/atacseq/sbc_sharc.md @@ -0,0 +1,11 @@ +# nf-core/configs: ATAC-Seq Specific Configuration - Sheffield Bioinformatics Core Facility ShARC + +Specific configuration for [nf-co.re/atacseq](https://nf-co.re/atacseq) pipeline + +## Usage + +To use, run nextflow with the pipeline using `-profile sbc_sharc` (note the single hyphen). + +This will download and launch the sarek specific [`sbc_sharc.config`](../../../conf/pipeline/atacseq/sbc_sharc.config) which has been pre-configured with a setup suitable for the [University of Sheffield ShARC cluster](https://docs.hpc.shef.ac.uk/en/latest/index.html) and will automatically load the appropriate pipeline-specific configuration file. + +Example: `nextflow run nf-core/atacseq -profile sbc_sharc` diff --git a/docs/pipeline/chipseq/sbc_sharc.md b/docs/pipeline/chipseq/sbc_sharc.md new file mode 100644 index 0000000..31baba1 --- /dev/null +++ b/docs/pipeline/chipseq/sbc_sharc.md @@ -0,0 +1,11 @@ +# nf-core/configs: ChIP-Seq Specific Configuration - Sheffield Bioinformatics Core Facility ShARC + +Specific configuration for [nf-co.re/chipseq](https://nf-co.re/chipseq) pipeline + +## Usage + +To use, run nextflow with the pipeline using `-profile sbc_sharc` (note the single hyphen). + +This will download and launch the sarek specific [`sbc_sharc.config`](../../../conf/pipeline/chipseq/sbc_sharc.config) which has been pre-configured with a setup suitable for the [University of Sheffield ShARC cluster](https://docs.hpc.shef.ac.uk/en/latest/index.html) and will automatically load the appropriate pipeline-specific configuration file. + +Example: `nextflow run nf-core/chipseq -profile sbc_sharc` diff --git a/docs/pipeline/rnaseq/sbc_sharc.md b/docs/pipeline/rnaseq/sbc_sharc.md new file mode 100644 index 0000000..562f84d --- /dev/null +++ b/docs/pipeline/rnaseq/sbc_sharc.md @@ -0,0 +1,11 @@ +# nf-core/configs: RNA-Seq Specific Configuration - Sheffield Bioinformatics Core Facility ShARC + +Specific configuration for [nf-co.re/rnaseq](https://nf-co.re/rnaseq) pipeline + +## Usage + +To use, run nextflow with the pipeline using `-profile sbc_sharc` (note the single hyphen). + +This will download and launch the sarek specific [`sbc_sharc.config`](../../../conf/pipeline/rnaseq/sbc_sharc.config) which has been pre-configured with a setup suitable for the [University of Sheffield ShARC cluster](https://docs.hpc.shef.ac.uk/en/latest/index.html) and will automatically load the appropriate pipeline-specific configuration file. + +Example: `nextflow run nf-core/rnaseq -profile sbc_sharc` diff --git a/docs/pipeline/sarek/sbc_sharc.md b/docs/pipeline/sarek/sbc_sharc.md new file mode 100644 index 0000000..361be18 --- /dev/null +++ b/docs/pipeline/sarek/sbc_sharc.md @@ -0,0 +1,11 @@ +# nf-core/configs: Sarek Specific Configuration - Sheffield Bioinformatics Core Facility ShARC + +Specific configuration for [nf-co.re/sarek](https://nf-co.re/sarek) pipeline + +## Usage + +To use, run nextflow with the pipeline using `-profile sbc_sharc` (note the single hyphen). + +This will download and launch the sarek specific [`sbc_sharc.config`](../../../conf/pipeline/sarek/sbc_sharc.config) which has been pre-configured with a setup suitable for the [University of Sheffield ShARC cluster](https://docs.hpc.shef.ac.uk/en/latest/index.html) and will automatically load the appropriate pipeline-specific configuration file. + +Example: `nextflow run nf-core/sarek -profile sbc_sharc` diff --git a/docs/sbc_sharc.md b/docs/sbc_sharc.md index 7d5bcd3..a40b7ea 100644 --- a/docs/sbc_sharc.md +++ b/docs/sbc_sharc.md @@ -1,5 +1,12 @@ # nf-core/configs: Sheffield Bioinformatics Core Facility ShARC Configuration + +## Using the SBC_ShARC Institutional Configuration Profile + +To use [`sbc_sharc.config`](../conf/sbc_sharc.config), run nextflow with an nf-core pipeline using `-profile sbc_sharc` (note the single hyphen). + +This will download and launch [`sbc_sharc.config`](../conf/sbc_sharc.config) which has been pre-configured with a setup suitable for the ShARC cluster and will automatically load the appropriate pipeline-specific configuration file. + The following nf-core pipelines have been successfully configured for use on the the [University of Sheffield ShARC cluster](https://docs.hpc.shef.ac.uk/en/latest/index.html): - [nf-co.re/atacseq](https://nf-co.re/atacseq) @@ -17,199 +24,10 @@ When using [`sbc_sharc.config`](../conf/sbc_sharc.config) with the pipelines lis The [`sbc_sharc.config`](../conf/sbc_sharc.config) configuration file might work with other nf-core pipelines as it stands but we cannot guarantee they will run without issue. We will be continuing to create, test and optimise configurations for new pipelines in the future. -## Using the SBC_ShARC Institutional Configuration Profile - -To use [`sbc_sharc.config`](../conf/sbc_sharc.config), run nextflow with an nf-core pipeline using `-profile sbc_sharc` (note the single hyphen). This will download and launch [`sbc_sharc.config`](../conf/sbc_sharc.config) which has been pre-configured with a setup suitable for the ShARC cluster and will automatically load the appropriate pipeline-specific configuration file. - -For a full guide on how to setup and run Nextflow using nf-core pipelines on ShARC, see the **Running Nextflow with nf-core Pipelines on ShARC** section below. - - ## A Note on Singularity Containers The [`sbc_sharc.config`](../conf/sbc_sharc.config) configuration file supports running nf-core pipelines with Singularity containers; Singularity images will be downloaded automatically before execution of the pipeline. -Please read the **Configure Singularity for use with Nextflow and nf-core** sub-section below. - - -## Running Nextflow with nf-core Pipelines on ShARC - -Nextflow is not currently available on ShARC as an environmental software module. The most simple solution to this issue is to install Nextflow and nf-core using a personal install of miniconda. This guide will describe the main steps, which are to: - -1. Install miniconda as a personal software module -2. Load and configure conda -3. Install Nextflow and nf-core within a conda environment -4. Configure Singularity for use with Nextflow and nf-core -5. Setup your project directory and configure your run -6. Submit your run to the SGE scheduler - - -### 1. Install Miniconda as a Personal Software Module - -Connect to ShARC via SSH and login to a worker node via an interactive session. - -```shell -# login -ssh -X username@sharc.shef.ac.uk - -# request a command line only interactive session - some extra resources prevent issues building conda env later -qrsh -l rmem=4G -pe smp 2 -``` - -Navigate your folder within the data area of the file store. - -```shell -cd /data/$USER -``` - -Download and run the miniconda installer by running the following series of commands: - -```shell -# download the latest installer file -wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh - -# check the hashes match -sha256sum Miniconda3-latest-Linux-x86_64.sh - -# make the file executable -chmod +x Miniconda3-latest-Linux-x86_64.sh - -# run the installer -bash Miniconda3-latest-Linux-x86_64.sh -``` - -The miniconda installer will now run and walk you through the install process. There are two **IMPORTANT** things you must take note of at this point: - -1. You will have to overwrite the default install path when prompted by the miniconda installer to check the install path - the directory to which the install is attempted should be `/data/$USER/miniconda`. - -``` -Miniconda3 will now be installed into this location: -///miniconda3 - - - Press ENTER to confirm the location - - Press CTRL-C to abort the installation - - Or specify a different location below - -[///miniconda3] >>> /data/$USER/miniconda -``` - -2. **DO NOT** initialize miniconda at the end of the install process when prompted as shown here: - -``` -Do you wish the installer to initialize Miniconda3 -by running conda init? [yes|no] -[yes] >>> no -``` - -Once the installer has run, delete the installation script. - -```shell -rm Miniconda3-latest-Linux-x86_64.sh -``` - -Now make a modules folder and module file. - -```shell -# modules folder -mkdir /home/$USER/modules - -# module file -nano /home/$USER/modules/miniconda -``` - -Paste the below into the nano editor that opens upon running the final command. Note that this file is in Tcl not BASh, so environmental variable handing is different from the normal `$USER` for username. - -``` -#%Module10.2##################################################################### -## -## User Data Directory Miniconda module file -## -################################################################################ - -proc ModulesHelp { } { - global version - - puts stderr "Makes a user's personal install of Miniconda available." -} - -module-whatis "Makes a user's personal install of Miniconda available." - -# module variables - -set MINICONDA_DIR /data/$env(USER)/miniconda/bin - -prepend-path PATH $MINICONDA_DIR -``` - -Now run the following line to make your personal modules available for loading whenever you login. - -```shell -echo "module use /home/$USER/modules" >> ~/.bashrc -``` - -The last thing to note here is that you should not load the anaconda environmental module available to all HPC users and the personal miniconda module you have just made at the same time. - -For further information on making software available via a custom module file visit: - -[Making software available via a custom module file](https://docs.hpc.shef.ac.uk/en/latest/referenceinfo/environment-modules/creating-custom-modulefiles.html) - - -## 2. Load and Configure Conda - -Run the following commands in order and follow any prompts as appropriate: - -```shell -# load the miniconda module - if not already loaded -module load miniconda - -# disable base environment auto-activation -conda config --set auto_activate_base false - -# add the bioconda and conda-forge channels to conda configuration -conda config --add channels bioconda -conda config --add channels conda-forge - -# set channel_priority to "strict" -conda config --set channel_priority strict - -# ensure conda is up-to-date -conda update conda -``` - - -## 3. Install Nextflow and nf-core within a Conda Environment - -Run the following commands in order and follow any prompts as appropriate: - -```shell -# make the "nf_env" environment (in /home/$USER/.conda/envs/nf_env) -conda create --name nf_env nextflow nf-core - -# activate the environment -source activate nf_env - -# ensure all packages are up-to-date -conda update --all -``` - -You can now test the install has worked by running the following: - -```shell -# test the environment is working -nextflow info - -# test functionality -nextflow run hello -``` - -When you are finished, you can deactivate your conda environment using the command `conda deactivate`. - -Although you should not yet do this, should you wish to unload your personal miniconda module you can do so by running `module unload miniconda`. - -Step 5. describes the process of running an nf-core pipeline using Nextflow. You do not have to have a conda environment active for this part of the process as it will be loaded as part of your submission script, but you should not unload the miniconda module at this point. - - -## 4. Configure Singularity for use with Nextflow and nf-core - When you run nextflow for the first time, Singularity will create a hidden directory `.singularity` in your `$HOME` directory `/home/$USER` which has very very limited (10GB) space available. It is therefore a good idea to create a directory somewhere else (e.g., `/data/$USER`) with more room and link the locations. To do this, run the following series of commands: ```shell @@ -223,133 +41,3 @@ mkdir /data/$USER/.singularity ln -s /data/$USER/.singularity .singularity ``` - -## 5. Setup your Project and Configure your Run - -Whichever file store you decide to locate your project root directory in, the assumed project sub-directory structure within this guide is as follows: - -``` -/filestore/$USER/ -│ -└── project_root/ - │ - ├── config - ├── params - ├── sample_sheet - └── script -``` - -There are three things you will require to run an nf-core pipeline: - -1. A sample sheet -2. A pipeline launcher parameter configuration file -3. A submission script - -You can find nf-core pipelines by visiting [https://nf-co.re/pipelines](https://nf-co.re/pipelines). Each pipeline page has more information on how to use the pipeline as well as a full description of sample sheet requirements and formatting. - -Your sample sheet should be located inside your `sample_sheet` sub-directory. - -The general launch command in the script template below assumes you have configured your specific run using an nf-core pipeline launcher. For example, the launcher for the nf-core/rnaseq pipeline that can be found [here](https://nf-co.re/launch?pipeline=rnaseq). The parameters specified for your run using the launcher should be saved in a file named `nf-params.json` within the `params` sub-directory of your project root. - -To create your run script, navigate to the `script` sub-directory and run the following: - -```shell -nano nf_submission.sh -``` - -Paste the below into the editor ensuring to change the generic information for your own where indicated in the comment lines: - -```shell -#!/bin/bash - -## SGE scheduler flags - -# job name >>> edit "pipeline_name" for the name of the pipeline you are running e.g. rnaseq <<< -#$ -N nf-pipeline_name - -# specify queue and project for the nextflow driver job >>> keep and edit if using a priority queue else delete both <<< -#$ -q queue_name.q -#$ -P queue_name - -# request resources for the nextflow driver job -#$ -pe smp 1 -#$ -l rmem=2G - -# export environmental variables in current shell environment to job -#$ -V - -# send email >>> edit "username" <<< -#$ -M username@sheffield.ac.uk -#$ -m beas - -# merge standard error stream into the standard output stream -#$ -j y - -# output log file -#$ -o nextflow.log - - -## load miniconda module and activate analysis environment - -module load miniconda -source activate nf_env - - -## define and export variables - -# prevent java vm requesting too much memory and killing run -export NXF_OPTS="-Xms1g -Xmx2g" - -# path to singularity cache -export NXF_SINGULARITY_CACHEDIR="/home/$USER/.singularity" - -# project name >>> edit "project_name" so that it is the name of your project root directory <<< -PROJECT="project_name" - -# project directories >>> edit the name of the "filestore" e.g. fastdata <<< -PARAM_DIR="/filestore/$USER/$PROJECT/params" -CONFIG_DIR="/filestore/$USER/$PROJECT/config" - - -## run command >>> edit "pipeline" and "version" <<< - -nextflow run nf-core/pipeline \ --r version \ --profile sbc_sharc \ --resume \ --params-file ${PARAM_DIR}/nf-params.json - -``` - -Now save and exit by typing "Ctrl + O" then "Return" then "Ctrl + X". - -**OPTIONAL:** If you have specified a priority access queue in your submission script, you will need a personal configuration to send your jobs and not just your driver script to the appropriate queue. Navigate to the `config` sub-directory of your project folder and run the following: - -```shell -nano personal.config -``` - -Then paste the following into the editor, ensuring you enter the correct queue name: - -``` -process { - queue = 'queue-name.q' - clusterOptions = { "-P queue-name -l rmem=${task.memory.toGiga()}G" } -} -``` - -Save and exit by typing "Ctrl + O" then "Return" then "Ctrl + X". - -Now append `-c ${CONFIG_DIR}/personal.config` to the `nextflow run` command on a new line in your submission script. - - -## 6. Submit your Run to the SGE Scheduler - -Once you have fulfilled all of the requirements above, you should be ready to submit your batch job to the SGE scheduler on ShARC. From the project root, type the following: - -```bash -qsub ./scripts/nf_submission.sh -``` - -Your pipeline run should start momentarily. Good Luck! - From 32403b6222676adf4fd0b5d8b7f7d27478d028d2 Mon Sep 17 00:00:00 2001 From: lquayle88 Date: Wed, 14 Sep 2022 08:46:11 +0100 Subject: [PATCH 5/6] Updated pipeline config docs for atacseq, chipseq and rnaseq --- docs/pipeline/atacseq/sbc_sharc.md | 2 +- docs/pipeline/chipseq/sbc_sharc.md | 2 +- docs/pipeline/rnaseq/sbc_sharc.md | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/pipeline/atacseq/sbc_sharc.md b/docs/pipeline/atacseq/sbc_sharc.md index f73d79b..1f33453 100644 --- a/docs/pipeline/atacseq/sbc_sharc.md +++ b/docs/pipeline/atacseq/sbc_sharc.md @@ -6,6 +6,6 @@ Specific configuration for [nf-co.re/atacseq](https://nf-co.re/atacseq) pipeline To use, run nextflow with the pipeline using `-profile sbc_sharc` (note the single hyphen). -This will download and launch the sarek specific [`sbc_sharc.config`](../../../conf/pipeline/atacseq/sbc_sharc.config) which has been pre-configured with a setup suitable for the [University of Sheffield ShARC cluster](https://docs.hpc.shef.ac.uk/en/latest/index.html) and will automatically load the appropriate pipeline-specific configuration file. +This will download and launch the atacseq specific [`sbc_sharc.config`](../../../conf/pipeline/atacseq/sbc_sharc.config) which has been pre-configured with a setup suitable for the [University of Sheffield ShARC cluster](https://docs.hpc.shef.ac.uk/en/latest/index.html) and will automatically load the appropriate pipeline-specific configuration file. Example: `nextflow run nf-core/atacseq -profile sbc_sharc` diff --git a/docs/pipeline/chipseq/sbc_sharc.md b/docs/pipeline/chipseq/sbc_sharc.md index 31baba1..4280db9 100644 --- a/docs/pipeline/chipseq/sbc_sharc.md +++ b/docs/pipeline/chipseq/sbc_sharc.md @@ -6,6 +6,6 @@ Specific configuration for [nf-co.re/chipseq](https://nf-co.re/chipseq) pipeline To use, run nextflow with the pipeline using `-profile sbc_sharc` (note the single hyphen). -This will download and launch the sarek specific [`sbc_sharc.config`](../../../conf/pipeline/chipseq/sbc_sharc.config) which has been pre-configured with a setup suitable for the [University of Sheffield ShARC cluster](https://docs.hpc.shef.ac.uk/en/latest/index.html) and will automatically load the appropriate pipeline-specific configuration file. +This will download and launch the chipseq specific [`sbc_sharc.config`](../../../conf/pipeline/chipseq/sbc_sharc.config) which has been pre-configured with a setup suitable for the [University of Sheffield ShARC cluster](https://docs.hpc.shef.ac.uk/en/latest/index.html) and will automatically load the appropriate pipeline-specific configuration file. Example: `nextflow run nf-core/chipseq -profile sbc_sharc` diff --git a/docs/pipeline/rnaseq/sbc_sharc.md b/docs/pipeline/rnaseq/sbc_sharc.md index 562f84d..d62fe25 100644 --- a/docs/pipeline/rnaseq/sbc_sharc.md +++ b/docs/pipeline/rnaseq/sbc_sharc.md @@ -6,6 +6,6 @@ Specific configuration for [nf-co.re/rnaseq](https://nf-co.re/rnaseq) pipeline To use, run nextflow with the pipeline using `-profile sbc_sharc` (note the single hyphen). -This will download and launch the sarek specific [`sbc_sharc.config`](../../../conf/pipeline/rnaseq/sbc_sharc.config) which has been pre-configured with a setup suitable for the [University of Sheffield ShARC cluster](https://docs.hpc.shef.ac.uk/en/latest/index.html) and will automatically load the appropriate pipeline-specific configuration file. +This will download and launch the rnaseq specific [`sbc_sharc.config`](../../../conf/pipeline/rnaseq/sbc_sharc.config) which has been pre-configured with a setup suitable for the [University of Sheffield ShARC cluster](https://docs.hpc.shef.ac.uk/en/latest/index.html) and will automatically load the appropriate pipeline-specific configuration file. Example: `nextflow run nf-core/rnaseq -profile sbc_sharc` From be356eb400c8dce34c1638d5754b44ad7d167efa Mon Sep 17 00:00:00 2001 From: lquayle88 Date: Wed, 14 Sep 2022 10:24:56 +0100 Subject: [PATCH 6/6] Ran prettier on docs/sbc_sharc.md --- docs/sbc_sharc.md | 3 --- 1 file changed, 3 deletions(-) diff --git a/docs/sbc_sharc.md b/docs/sbc_sharc.md index a40b7ea..f82b348 100644 --- a/docs/sbc_sharc.md +++ b/docs/sbc_sharc.md @@ -1,6 +1,5 @@ # nf-core/configs: Sheffield Bioinformatics Core Facility ShARC Configuration - ## Using the SBC_ShARC Institutional Configuration Profile To use [`sbc_sharc.config`](../conf/sbc_sharc.config), run nextflow with an nf-core pipeline using `-profile sbc_sharc` (note the single hyphen). @@ -23,7 +22,6 @@ When using [`sbc_sharc.config`](../conf/sbc_sharc.config) with the pipelines lis The [`sbc_sharc.config`](../conf/sbc_sharc.config) configuration file might work with other nf-core pipelines as it stands but we cannot guarantee they will run without issue. We will be continuing to create, test and optimise configurations for new pipelines in the future. - ## A Note on Singularity Containers The [`sbc_sharc.config`](../conf/sbc_sharc.config) configuration file supports running nf-core pipelines with Singularity containers; Singularity images will be downloaded automatically before execution of the pipeline. @@ -40,4 +38,3 @@ mkdir /data/$USER/.singularity # link the new directory with the existing one ln -s /data/$USER/.singularity .singularity ``` -