From 2b58a62a9b057b2c0f23a57d5f660da23e234e77 Mon Sep 17 00:00:00 2001 From: Edmund Miller Date: Fri, 11 Jun 2021 14:50:58 -0500 Subject: [PATCH 01/14] feat(utd): Add initial sysbio config --- conf/utd_sysbio.config | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) create mode 100644 conf/utd_sysbio.config diff --git a/conf/utd_sysbio.config b/conf/utd_sysbio.config new file mode 100644 index 0000000..654ac09 --- /dev/null +++ b/conf/utd_sysbio.config @@ -0,0 +1,38 @@ +//Profile config names for nf-core/configs +params { + config_profile_description = 'University of Texas at Dallas HPC cluster profile provided by nf-core/configs' + config_profile_contact = 'Edmund Miller(@emiller88)' + config_profile_url = 'http://docs.oithpc.utdallas.edu/' + singularity_cache_dir = '/scratch/applied-genomics/singularity' +} + +env { + TMPDIR = '/home/$USER/scratch/tmp' +} + +singularity { + enabled = true + envWhitelist='SINGULARITY_BINDPATH' + autoMounts = true + cacheDir = params.singularity_cache_dir +} + +process { + beforeScript = 'module load singularity/3.4.1' + executor = 'slurm' + queue = { task.memory >= 30.GB && task.cpu <= 16 ? 'normal': 'smallmem' } + + + withLabel:process_high { + cpus = { check_max( 16 * task.attempt, 'cpus' ) } + memory = { check_max( 62.GB * task.attempt, 'memory' ) } + time = { check_max( 16.h * task.attempt, 'time' ) } + } +} + +params { + igenomes_base = '/scratch/applied-genomics/references/iGenomes/references/' + max_memory = 90.GB + max_cpus = 16 + max_time = 96.h +} \ No newline at end of file From 04d0b27e816ed144447906e8be7d76422f5800d2 Mon Sep 17 00:00:00 2001 From: Edmund Miller Date: Wed, 16 Jun 2021 21:17:40 -0500 Subject: [PATCH 02/14] fix(sysbio): Reduce memory on high processes --- conf/utd_sysbio.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/utd_sysbio.config b/conf/utd_sysbio.config index 654ac09..a6d6ae0 100644 --- a/conf/utd_sysbio.config +++ b/conf/utd_sysbio.config @@ -25,7 +25,7 @@ process { withLabel:process_high { cpus = { check_max( 16 * task.attempt, 'cpus' ) } - memory = { check_max( 62.GB * task.attempt, 'memory' ) } + memory = { check_max( 60.GB * task.attempt, 'memory' ) } time = { check_max( 16.h * task.attempt, 'time' ) } } } From b11f137901bef7db3529980798e14f9456e07b4e Mon Sep 17 00:00:00 2001 From: Edmund Miller Date: Wed, 16 Jun 2021 21:18:28 -0500 Subject: [PATCH 03/14] fix(sysbio): Leave igenomes unfinished --- conf/utd_sysbio.config | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/conf/utd_sysbio.config b/conf/utd_sysbio.config index a6d6ae0..41c9318 100644 --- a/conf/utd_sysbio.config +++ b/conf/utd_sysbio.config @@ -31,7 +31,8 @@ process { } params { - igenomes_base = '/scratch/applied-genomics/references/iGenomes/references/' + // TODO Need to initialize this + // igenomes_base = '/scratch/applied-genomics/references/iGenomes/references/' max_memory = 90.GB max_cpus = 16 max_time = 96.h From 472082254a04f3916a442b10fe1ab401fd3049cc Mon Sep 17 00:00:00 2001 From: Edmund Miller Date: Wed, 16 Jun 2021 21:18:48 -0500 Subject: [PATCH 04/14] fix(sysbio): Add Star resources --- conf/utd_sysbio.config | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/conf/utd_sysbio.config b/conf/utd_sysbio.config index 41c9318..bd0eda8 100644 --- a/conf/utd_sysbio.config +++ b/conf/utd_sysbio.config @@ -23,6 +23,10 @@ process { queue = { task.memory >= 30.GB && task.cpu <= 16 ? 'normal': 'smallmem' } + withName:STAR_ALIGN { + memory = 36.GB + } + withLabel:process_high { cpus = { check_max( 16 * task.attempt, 'cpus' ) } memory = { check_max( 60.GB * task.attempt, 'memory' ) } From 2a958f5ce3d5a26a3a2cd583a463a020f072b29c Mon Sep 17 00:00:00 2001 From: Edmund Miller Date: Wed, 16 Jun 2021 21:19:38 -0500 Subject: [PATCH 05/14] docs: Add sysbio --- README.md | 1 + docs/utd_sysbio.config | 18 ++++++++++++++++++ nfcore_custom.config | 2 ++ 3 files changed, 21 insertions(+) create mode 100644 docs/utd_sysbio.config diff --git a/README.md b/README.md index 6daff2a..c272420 100644 --- a/README.md +++ b/README.md @@ -130,6 +130,7 @@ Currently documentation is available for the following systems: * [UCT_HPC](docs/uct_hpc.md) * [UPPMAX](docs/uppmax.md) * [UTD_GANYMEDE](docs/utd_ganymede.md) +* [UTD_SYSBIO](docs/utd_sysbio.md) * [UZH](docs/uzh.md) ### Uploading to `nf-core/configs` diff --git a/docs/utd_sysbio.config b/docs/utd_sysbio.config new file mode 100644 index 0000000..ff00de9 --- /dev/null +++ b/docs/utd_sysbio.config @@ -0,0 +1,18 @@ +# nf-core/configs: UTD Sysbio Configuration + +All nf-core pipelines have been successfully configured for use on the Sysbio HPC cluster at the [The Univeristy of Texas at Dallas](https://www.utdallas.edu/). + +To use, run the pipeline with `-profile utd_sysbio`. This will download and launch the [`utd_sysbio.config`](../conf/utd_sysbio.config) which has been pre-configured with a setup suitable for the Sysbio HPC cluster. Using this profile, a docker image containing all of the required software will be downloaded, and converted to a Singularity image before execution of the pipeline. + +Before running the pipeline you will need to load Singularity using the environment module system on Sysbio. You can do this by issuing the commands below: + +```bash +## Singularity environment modules +module purge +module load singularity +``` + +All of the intermediate files required to run the pipeline will be stored in the `work/` directory. It is recommended to delete this directory after the pipeline has finished successfully because it can get quite large, and all of the main output files will be saved in the `results/` directory anyway. + +>NB: You will need an account to use the HPC cluster on Sysbio in order to run the pipeline. If in doubt contact OIT. +>NB: Nextflow will need to submit the jobs via SLURM to the HPC cluster and as such the commands above will have to be executed on one of the login nodes. If in doubt contact OIT. diff --git a/nfcore_custom.config b/nfcore_custom.config index 9b4fbda..dbb4638 100644 --- a/nfcore_custom.config +++ b/nfcore_custom.config @@ -49,6 +49,7 @@ profiles { uct_hpc { includeConfig "${params.custom_config_base}/conf/uct_hpc.config" } uppmax { includeConfig "${params.custom_config_base}/conf/uppmax.config" } utd_ganymede { includeConfig "${params.custom_config_base}/conf/utd_ganymede.config" } + utd_sysbio { includeConfig "${params.custom_config_base}/conf/utd_sysbio.config" } uzh { includeConfig "${params.custom_config_base}/conf/uzh.config" } jax { includeConfig "${params.custom_config_base}/conf/jax.config" } } @@ -70,5 +71,6 @@ params { genouest: ['.genouest.org'], uppmax: ['.uppmax.uu.se'], utd_ganymede: ['ganymede.utdallas.edu'] + utd_sysbio: ['sysbio.utdallas.edu'] ] } From 7ff1c6cc7d2494cfa5ecd7a79640fb625453c1f9 Mon Sep 17 00:00:00 2001 From: Edmund Miller Date: Wed, 16 Jun 2021 21:19:49 -0500 Subject: [PATCH 06/14] ci: Add sysbio --- .github/workflows/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 65cabe5..92bfce8 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -16,7 +16,7 @@ jobs: needs: test_all_profiles strategy: matrix: - profile: ['abims', 'awsbatch', 'bi','bigpurple', 'binac', 'biohpc_gen', 'cbe', 'ccga_dx', 'ccga_med', 'cfc', 'cfc_dev', 'crick', 'denbi_qbic', 'ebc', 'eddie', 'eva', 'genotoul', 'genouest', 'gis', 'google', 'hebbe', 'icr_davros', 'ifb_core', 'imperial', 'imperial_mb', 'jax', 'kraken', 'mpcdf', 'munin', 'oist', 'pasteur', 'phoenix', 'prince', 'sanger', 'seg_globe', 'shh', 'uct_hpc', 'uppmax', 'utd_ganymede', 'uzh'] + profile: ['abims', 'awsbatch', 'bi','bigpurple', 'binac', 'biohpc_gen', 'cbe', 'ccga_dx', 'ccga_med', 'cfc', 'cfc_dev', 'crick', 'denbi_qbic', 'ebc', 'eddie', 'eva', 'genotoul', 'genouest', 'gis', 'google', 'hebbe', 'icr_davros', 'ifb_core', 'imperial', 'imperial_mb', 'jax', 'kraken', 'mpcdf', 'munin', 'oist', 'pasteur', 'phoenix', 'prince', 'sanger', 'seg_globe', 'shh', 'uct_hpc', 'uppmax', 'utd_ganymede', 'utd_sysbio' 'uzh'] steps: - uses: actions/checkout@v1 - name: Install Nextflow From 14a2106c63c975eb8798525ed79593f714d00c7a Mon Sep 17 00:00:00 2001 From: Edmund Miller Date: Thu, 17 Jun 2021 08:06:24 -0500 Subject: [PATCH 07/14] fix(sysbio): Move rnaseq specific things to pipeline config --- conf/pipeline/rnaseq/utd_sysbio.config | 13 +++++++++++++ conf/utd_sysbio.config | 11 ----------- 2 files changed, 13 insertions(+), 11 deletions(-) create mode 100644 conf/pipeline/rnaseq/utd_sysbio.config diff --git a/conf/pipeline/rnaseq/utd_sysbio.config b/conf/pipeline/rnaseq/utd_sysbio.config new file mode 100644 index 0000000..1af5ed1 --- /dev/null +++ b/conf/pipeline/rnaseq/utd_sysbio.config @@ -0,0 +1,13 @@ +process { + + withName : "STAR_ALIGN" { + memory = 36.GB + } + + withLabel:process_high { + cpus = { check_max( 16 * task.attempt, 'cpus' ) } + memory = { check_max( 60.GB * task.attempt, 'memory' ) } + time = { check_max( 16.h * task.attempt, 'time' ) } + } + +} diff --git a/conf/utd_sysbio.config b/conf/utd_sysbio.config index bd0eda8..5d02901 100644 --- a/conf/utd_sysbio.config +++ b/conf/utd_sysbio.config @@ -21,17 +21,6 @@ process { beforeScript = 'module load singularity/3.4.1' executor = 'slurm' queue = { task.memory >= 30.GB && task.cpu <= 16 ? 'normal': 'smallmem' } - - - withName:STAR_ALIGN { - memory = 36.GB - } - - withLabel:process_high { - cpus = { check_max( 16 * task.attempt, 'cpus' ) } - memory = { check_max( 60.GB * task.attempt, 'memory' ) } - time = { check_max( 16.h * task.attempt, 'time' ) } - } } params { From 87c8e346e07352070e3e2b644a2bb80256244a23 Mon Sep 17 00:00:00 2001 From: Edmund Miller Date: Thu, 17 Jun 2021 08:13:29 -0500 Subject: [PATCH 08/14] fix(sysbio): Change filetype --- docs/{utd_sysbio.config => utd_sysbio.md} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename docs/{utd_sysbio.config => utd_sysbio.md} (100%) diff --git a/docs/utd_sysbio.config b/docs/utd_sysbio.md similarity index 100% rename from docs/utd_sysbio.config rename to docs/utd_sysbio.md From 41ae99d34a81c364ebede9ea6b951fb52c551419 Mon Sep 17 00:00:00 2001 From: Edmund Miller Date: Thu, 17 Jun 2021 08:17:18 -0500 Subject: [PATCH 09/14] feat(sysbio): Add cleanup --- conf/utd_sysbio.config | 3 +++ docs/utd_sysbio.md | 2 -- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/conf/utd_sysbio.config b/conf/utd_sysbio.config index 5d02901..28460a8 100644 --- a/conf/utd_sysbio.config +++ b/conf/utd_sysbio.config @@ -23,6 +23,9 @@ process { queue = { task.memory >= 30.GB && task.cpu <= 16 ? 'normal': 'smallmem' } } +// Preform work directory cleanup after a successful run +cleanup = true + params { // TODO Need to initialize this // igenomes_base = '/scratch/applied-genomics/references/iGenomes/references/' diff --git a/docs/utd_sysbio.md b/docs/utd_sysbio.md index ff00de9..3ae617a 100644 --- a/docs/utd_sysbio.md +++ b/docs/utd_sysbio.md @@ -12,7 +12,5 @@ module purge module load singularity ``` -All of the intermediate files required to run the pipeline will be stored in the `work/` directory. It is recommended to delete this directory after the pipeline has finished successfully because it can get quite large, and all of the main output files will be saved in the `results/` directory anyway. - >NB: You will need an account to use the HPC cluster on Sysbio in order to run the pipeline. If in doubt contact OIT. >NB: Nextflow will need to submit the jobs via SLURM to the HPC cluster and as such the commands above will have to be executed on one of the login nodes. If in doubt contact OIT. From e35c5fd1d87d1b004650db4c1c6194215e48c22f Mon Sep 17 00:00:00 2001 From: Edmund Miller Date: Thu, 17 Jun 2021 08:32:31 -0500 Subject: [PATCH 10/14] fix(sysbio): Add nf-core params --- conf/pipeline/rnaseq/utd_sysbio.config | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/conf/pipeline/rnaseq/utd_sysbio.config b/conf/pipeline/rnaseq/utd_sysbio.config index 1af5ed1..5e4c492 100644 --- a/conf/pipeline/rnaseq/utd_sysbio.config +++ b/conf/pipeline/rnaseq/utd_sysbio.config @@ -1,3 +1,10 @@ +params { + config_profile_description = 'University of Texas at Dallas HPC cluster profile provided by nf-core/configs' + config_profile_contact = 'Edmund Miller(@emiller88)' + config_profile_url = 'http://docs.oithpc.utdallas.edu/' + singularity_cache_dir = '/scratch/applied-genomics/singularity' +} + process { withName : "STAR_ALIGN" { From 21e1037e0c61a376edfe0e8f2778808b89fa6be2 Mon Sep 17 00:00:00 2001 From: Edmund Miller Date: Thu, 17 Jun 2021 08:36:24 -0500 Subject: [PATCH 11/14] docs(sysbio): Add rnaseq specific docs --- conf/pipeline/rnaseq/utd_sysbio.config | 1 - docs/pipeline/rnaseq/utd_sysbio.md | 23 +++++++++++++++++++++++ 2 files changed, 23 insertions(+), 1 deletion(-) create mode 100644 docs/pipeline/rnaseq/utd_sysbio.md diff --git a/conf/pipeline/rnaseq/utd_sysbio.config b/conf/pipeline/rnaseq/utd_sysbio.config index 5e4c492..0c9dd7d 100644 --- a/conf/pipeline/rnaseq/utd_sysbio.config +++ b/conf/pipeline/rnaseq/utd_sysbio.config @@ -2,7 +2,6 @@ params { config_profile_description = 'University of Texas at Dallas HPC cluster profile provided by nf-core/configs' config_profile_contact = 'Edmund Miller(@emiller88)' config_profile_url = 'http://docs.oithpc.utdallas.edu/' - singularity_cache_dir = '/scratch/applied-genomics/singularity' } process { diff --git a/docs/pipeline/rnaseq/utd_sysbio.md b/docs/pipeline/rnaseq/utd_sysbio.md new file mode 100644 index 0000000..ff228d5 --- /dev/null +++ b/docs/pipeline/rnaseq/utd_sysbio.md @@ -0,0 +1,23 @@ +# nf-core/configs: UTD sysbio rnaseq specific configuration + +Extra specific configuration for rnaseq pipeline + +## Usage + +To use, run the pipeline with `-profile sysbio`. + +This will download and launch the eager specific [`utd_sysbio.config`](../../../conf/pipeline/rnaseq/utd_sysbio.config) which has been pre-configured with a setup suitable for the shh cluster. + +Example: `nextflow run nf-core/rnaseq -profile sysbio` + +## rnaseq specific configurations for UTD sysbio + +Specific configurations for UTD Sysbio has been made for rnaseq. + +### General profiles + + + +### Contextual profiles + + From f9e4ec4a8d5ac96c349dacaa38bd47db611ca1fa Mon Sep 17 00:00:00 2001 From: Edmund Miller Date: Thu, 17 Jun 2021 08:50:25 -0500 Subject: [PATCH 12/14] fix(sysbio): Add sysbio to rnaseq --- pipeline/rnaseq.config | 1 + 1 file changed, 1 insertion(+) diff --git a/pipeline/rnaseq.config b/pipeline/rnaseq.config index 1a27463..17a4ca0 100644 --- a/pipeline/rnaseq.config +++ b/pipeline/rnaseq.config @@ -10,4 +10,5 @@ profiles { eddie { includeConfig "${params.custom_config_base}/conf/pipeline/rnaseq/eddie.config" } + utd_sysbio { includeConfig "${params.custom_config_base}/conf/pipeline/rnaseq/utd_sysbio.config" } } From 5ad959a4361a13adeab3e65920a9d6fc930ad824 Mon Sep 17 00:00:00 2001 From: Edmund Miller Date: Thu, 17 Jun 2021 09:08:27 -0500 Subject: [PATCH 13/14] fix(sysbio): Missing comma --- nfcore_custom.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nfcore_custom.config b/nfcore_custom.config index dbb4638..75e8309 100644 --- a/nfcore_custom.config +++ b/nfcore_custom.config @@ -70,7 +70,7 @@ params { genotoul: ['.genologin1.toulouse.inra.fr', '.genologin2.toulouse.inra.fr'], genouest: ['.genouest.org'], uppmax: ['.uppmax.uu.se'], - utd_ganymede: ['ganymede.utdallas.edu'] + utd_ganymede: ['ganymede.utdallas.edu'], utd_sysbio: ['sysbio.utdallas.edu'] ] } From cb213a499cfc3044e80f119121f7bcd2d010325a Mon Sep 17 00:00:00 2001 From: Edmund Miller Date: Thu, 17 Jun 2021 09:34:03 -0500 Subject: [PATCH 14/14] ci: Another missing comma --- .github/workflows/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 92bfce8..5dac8ac 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -16,7 +16,7 @@ jobs: needs: test_all_profiles strategy: matrix: - profile: ['abims', 'awsbatch', 'bi','bigpurple', 'binac', 'biohpc_gen', 'cbe', 'ccga_dx', 'ccga_med', 'cfc', 'cfc_dev', 'crick', 'denbi_qbic', 'ebc', 'eddie', 'eva', 'genotoul', 'genouest', 'gis', 'google', 'hebbe', 'icr_davros', 'ifb_core', 'imperial', 'imperial_mb', 'jax', 'kraken', 'mpcdf', 'munin', 'oist', 'pasteur', 'phoenix', 'prince', 'sanger', 'seg_globe', 'shh', 'uct_hpc', 'uppmax', 'utd_ganymede', 'utd_sysbio' 'uzh'] + profile: ['abims', 'awsbatch', 'bi','bigpurple', 'binac', 'biohpc_gen', 'cbe', 'ccga_dx', 'ccga_med', 'cfc', 'cfc_dev', 'crick', 'denbi_qbic', 'ebc', 'eddie', 'eva', 'genotoul', 'genouest', 'gis', 'google', 'hebbe', 'icr_davros', 'ifb_core', 'imperial', 'imperial_mb', 'jax', 'kraken', 'mpcdf', 'munin', 'oist', 'pasteur', 'phoenix', 'prince', 'sanger', 'seg_globe', 'shh', 'uct_hpc', 'uppmax', 'utd_ganymede', 'utd_sysbio', 'uzh'] steps: - uses: actions/checkout@v1 - name: Install Nextflow