From 537f52a6409200ac843e457722f0a74660d7d28c Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Sun, 14 Feb 2021 17:50:20 +0100 Subject: [PATCH 01/10] Add MPI-EVA profile --- README.md | 30 ++--- conf/eva.config | 52 ++++++++ conf/pipeline/eager/eva.config | 212 +++++++++++++++++++++++++++++++++ docs/eva.md | 29 +++++ docs/pipeline/eager/eva.md | 34 ++++++ nfcore_custom.config | 1 + 6 files changed, 344 insertions(+), 14 deletions(-) create mode 100644 conf/eva.config create mode 100644 conf/pipeline/eager/eva.config create mode 100644 docs/eva.md create mode 100644 docs/pipeline/eager/eva.md diff --git a/README.md b/README.md index 45721e8..56c73b6 100644 --- a/README.md +++ b/README.md @@ -6,20 +6,20 @@ A repository for hosting Nextflow configuration files containing custom paramete ## Table of contents -* [Using an existing config](#using-an-existing-config) - * [Configuration and parameters](#configuration-and-parameters) - * [Offline usage](#offline-usage) -* [Adding a new config](#adding-a-new-config) - * [Checking user hostnames](#checking-user-hostnames) - * [Testing](#testing) - * [Documentation](#documentation) - * [Uploading to `nf-core/configs`](#uploading-to-nf-coreconfigs) -* [Adding a new pipeline-specific config](#adding-a-new-pipeline-specific-config) - * [Pipeline-specific institutional documentation](#pipeline-specific-institutional-documentation) - * [Pipeline-specific documentation](#pipeline-specific-documentation) - * [Enabling pipeline-specific configs within a pipeline](#enabling-pipeline-specific-configs-within-a-pipeline) - * [Create the pipeline-specific `nf-core/configs` files](#create-the-pipeline-specific-nf-coreconfigs-files) -* [Help](#help) +- [Using an existing config](#using-an-existing-config) + - [Configuration and parameters](#configuration-and-parameters) + - [Offline usage](#offline-usage) +- [Adding a new config](#adding-a-new-config) + - [Checking user hostnames](#checking-user-hostnames) + - [Testing](#testing) + - [Documentation](#documentation) + - [Uploading to `nf-core/configs`](#uploading-to-nf-coreconfigs) +- [Adding a new pipeline-specific config](#adding-a-new-pipeline-specific-config) + - [Pipeline-specific institutional documentation](#pipeline-specific-institutional-documentation) + - [Pipeline-specific documentation](#pipeline-specific-documentation) + - [Enabling pipeline-specific configs within a pipeline](#enabling-pipeline-specific-configs-within-a-pipeline) + - [Create the pipeline-specific `nf-core/configs` files](#create-the-pipeline-specific-nf-coreconfigs-files) +- [Help](#help) ## Using an existing config @@ -107,6 +107,7 @@ Currently documentation is available for the following systems: * [CZBIOHUB_AWS](docs/czbiohub.md) * [DENBI_QBIC](docs/denbi_qbic.md) * [EBC](docs/ebc.md) +* [EVA](docs/eva.md) * [GENOTOUL](docs/genotoul.md) * [GENOUEST](docs/genouest.md) * [GIS](docs/gis.md) @@ -174,6 +175,7 @@ Currently documentation is available for the following pipelines within specific * [UPPMAX](docs/pipeline/ampliseq/uppmax.md) * eager * [SHH](docs/pipeline/eager/shh.md) + * [EVA](docs/pipeline/eager/eva.md) * rnafusion * [MUNIN](docs/pipeline/rnafusion/munin.md) * sarek diff --git a/conf/eva.config b/conf/eva.config new file mode 100644 index 0000000..497085d --- /dev/null +++ b/conf/eva.config @@ -0,0 +1,52 @@ +//Profile config names for nf-core/configs +params { + config_profile_description = 'Generic MPI-EVA cluster(s) profile provided by nf-core/configs.' + config_profile_contact = 'James Fellows Yates (@jfy133)' + config_profile_url = 'https://eva.mpg.de' +} + +// Preform work directory cleanup after a successful run +cleanup = true + +singularity { + enabled = true + autoMounts = true +} + +process { + executor = 'sge' + penv = 'smp' + queue = 'all.q' +} + +executor { + queueSize = 8 +} + +profiles { + archgen { + params { + igenomes_base = "/projects1/public_data/igenomes/" + config_profile_description = 'MPI-EVA archgen profile, provided by nf-core/configs.' + max_memory = 256.GB + max_cpus = 32 + max_time = 720.h + //Illumina iGenomes reference file path + igenomes_base = "/projects1/public_data/igenomes/" + } + + process { + queue = 'archgen.q' + } + + singularity { + cacheDir = "/mnt/archgen/users/singularity_scratch" + + } + + } + // Profile to deactivate automatic cleanup of work directory after a successful run. Overwrites cleanup option. + debug { + cleanup = false + } +} diff --git a/conf/pipeline/eager/eva.config b/conf/pipeline/eager/eva.config new file mode 100644 index 0000000..8850cf1 --- /dev/null +++ b/conf/pipeline/eager/eva.config @@ -0,0 +1,212 @@ +// Profile config names for nf-core/configs + +params { + // Specific nf-core/configs params + config_profile_contact = 'James Fellows Yates (@jfy133)' + config_profile_description = 'nf-core/eager EVA profile provided by nf-core/configs' +} + +// Specific nf-core/eager process configuration +process { + + maxRetries = 2 + + // Solution for clusterOptions comes from here: https://github.com/nextflow-io/nextflow/issues/332 + personal toMega conversion + clusterOptions = { "-S /bin/bash -j y -o output.log -l h_vmem=${task.memory.toMega().toString().replaceAll(/[\sB]/,'')}M,virtual_free=${task.memory.toMega().toString().replaceAll(/[\sB]/,'')}M" } + + withLabel:'sc_tiny'{ + cpus = { check_max( 1, 'cpus' ) } + memory = { check_max( 1.GB * task.attempt, 'memory' ) } + time = { task.attempt == 3 ? 1440.h : task.attempt == 2 ? 48.h : 2.h } + } + + withLabel:'sc_small'{ + cpus = { check_max( 1, 'cpus' ) } + memory = { check_max( 4.GB * task.attempt, 'memory' ) } + time = { task.attempt == 3 ? 1440.h : task.attempt == 2 ? 48.h : 2.h } + } + + withLabel:'sc_medium'{ + cpus = { check_max( 1, 'cpus' ) } + memory = { check_max( 8.GB * task.attempt, 'memory' ) } + time = { task.attempt == 3 ? 1440.h : task.attempt == 2 ? 48.h : 2.h } + } + + withLabel:'mc_small'{ + cpus = { check_max( 2, 'cpus' ) } + memory = { check_max( 4.GB * task.attempt, 'memory' ) } + time = { task.attempt == 3 ? 1440.h : task.attempt == 2 ? 48.h : 2.h } + } + + withLabel:'mc_medium' { + cpus = { check_max( 4, 'cpus' ) } + memory = { check_max( 8.GB * task.attempt, 'memory' ) } + time = { task.attempt == 3 ? 1440.h : task.attempt == 2 ? 48.h : 2.h } + } + + withLabel:'mc_large'{ + cpus = { check_max( 8, 'cpus' ) } + memory = { check_max( 16.GB * task.attempt, 'memory' ) } + time = { task.attempt == 3 ? 1440.h : task.attempt == 2 ? 48.h : 2.h } + } + + withLabel:'mc_huge'{ + cpus = { check_max( 32, 'cpus' ) } + memory = { check_max( 256.GB * task.attempt, 'memory' ) } + time = { task.attempt == 3 ? 1440.h : task.attempt == 2 ? 48.h : 2.h } + } + + // Fixes for SGE and Java incompatibility due to Java using more memory than you tell it to use + + withName: makeSeqDict { + clusterOptions = { "-S /bin/bash -l h_vmem=${task.memory.toMega().plus(4000).toString().replaceAll(/[\sB]/,'')}M,virtual_free=${task.memory.toMega().plus(4000).toString().replaceAll(/[\sB]/,'')}M" } + } + + withName: fastqc { + clusterOptions = { "-S /bin/bash -l h_vmem=${task.memory.toMega().plus(1000).toString().replaceAll(/[\sB]/,'')}M,virtual_free=${task.memory.toMega().plus(1000).toString().replaceAll(/[\sB]/,'')}M" } + } + + withName: adapter_removal { + clusterOptions = { "-S /bin/bash -l h_vmem=${task.memory.toMega().plus(1000).toString().replaceAll(/[\sB]/,'')}M,virtual_free=${task.memory.toMega().plus(1000).toString().replaceAll(/[\sB]/,'')}M" } + } + + withName: dedup { + clusterOptions = { "-S /bin/bash -l h_vmem=${task.memory.toMega().plus(1000).toString().replaceAll(/[\sB]/,'')}M,virtual_free=${task.memory.toMega().plus(1000).toString().replaceAll(/[\sB]/,'')}M" } + } + + withName: markduplicates { + clusterOptions = { "-S /bin/bash -l h_vmem=${task.memory.toMega().plus(5000).toString().replaceAll(/[\sB]/,'')}M,virtual_free=${task.memory.toMega().plus(1000).toString().replaceAll(/[\sB]/,'')}M" } + } + + withName: malt { + clusterOptions = { "-S /bin/bash -l h_vmem=${task.memory.toMega().plus(1000).toString().replaceAll(/[\sB]/,'')}M,virtual_free=${task.memory.toMega().plus(1000).toString().replaceAll(/[\sB]/,'')}M" } + } + + withName: maltextract { + clusterOptions = { "-S /bin/bash -l h_vmem=${task.memory.toMega().plus(1000).toString().replaceAll(/[\sB]/,'')}M,virtual_free=${task.memory.toMega().plus(1000).toString().replaceAll(/[\sB]/,'')}M" } + } + + withName: multivcfanalyzer { + clusterOptions = { "-S /bin/bash -l h_vmem=${task.memory.toMega().plus(1000).toString().replaceAll(/[\sB]/,'')}M,virtual_free=${task.memory.toMega().plus(1000).toString().replaceAll(/[\sB]/,'')}M" } + } + + withName: mtnucratio { + clusterOptions = { "-S /bin/bash -l h_vmem=${task.memory.toMega().plus(1000).toString().replaceAll(/[\sB]/,'')}M,virtual_free=${task.memory.toMega().plus(1000).toString().replaceAll(/[\sB]/,'')}M" } + } + + withName: vcf2genome { + clusterOptions = { "-S /bin/bash -l h_vmem=${task.memory.toMega().plus(1000).toString().replaceAll(/[\sB]/,'')}M,virtual_free=${task.memory.toMega().plus(1000).toString().replaceAll(/[\sB]/,'')}M" } + } + + withName: qualimap { + clusterOptions = { "-S /bin/bash -l h_vmem=${task.memory.toMega().plus(5000).toString().replaceAll(/[\sB]/,'')}M,virtual_free=${task.memory.toMega().plus(1000).toString().replaceAll(/[\sB]/,'')}M" } + } + + withName: damageprofiler { + clusterOptions = { "-S /bin/bash -l h_vmem=${task.memory.toMega().plus(5000).toString().replaceAll(/[\sB]/,'')}M,virtual_free=${task.memory.toMega().plus(1000).toString().replaceAll(/[\sB]/,'')}M" } + } + + withName: circularmapper { + clusterOptions = { "-S /bin/bash -l h_vmem=${task.memory.toMega().plus(1000).toString().replaceAll(/[\sB]/,'')}M,virtual_free=${task.memory.toMega().plus(1000).toString().replaceAll(/[\sB]/,'')}M" } + } + + withName: circulargenerator { + clusterOptions = { "-S /bin/bash -l h_vmem=${task.memory.toMega().plus(1000).toString().replaceAll(/[\sB]/,'')}M,virtual_free=${task.memory.toMega().plus(1000).toString().replaceAll(/[\sB]/,'')}M" } + } + + withName: preseq { + clusterOptions = { "-S /bin/bash -l h_vmem=${task.memory.toMega().plus(4000).toString().replaceAll(/[\sB]/,'')}M,virtual_free=${task.memory.toMega().plus(1000).toString().replaceAll(/[\sB]/,'')}M" } + } + +} + +profiles { + + big_data { + + params { + // Specific nf-core/configs params + config_profile_contact = 'James Fellows Yates (@jfy133)' + config_profile_description = 'nf-core/eager big-data EVA profile provided by nf-core/configs' + } + + executor { + queueSize = 6 + } + + process { + + maxRetries = 2 + + withName:hostremoval_input_fastq { + cpus = { check_max( 1, 'cpus' ) } + memory = { check_max( 32.GB * task.attempt, 'memory' ) } + time = 1440.h + } + + withLabel:'sc_tiny'{ + cpus = { check_max( 1, 'cpus' ) } + memory = { check_max( 2.GB * task.attempt, 'memory' ) } + time = { task.attempt == 3 ? 1440.h : task.attempt == 2 ? 48.h : 2.h } + } + + withLabel:'sc_small'{ + cpus = { check_max( 1, 'cpus' ) } + memory = { check_max( 8.GB * task.attempt, 'memory' ) } + time = { task.attempt == 3 ? 1440.h : task.attempt == 2 ? 48.h : 2.h } + } + + withLabel:'sc_medium'{ + cpus = { check_max( 1, 'cpus' ) } + memory = { check_max( 16.GB * task.attempt, 'memory' ) } + time = { task.attempt == 3 ? 1440.h : task.attempt == 2 ? 48.h : 2.h } + } + + withLabel:'mc_small'{ + cpus = { check_max( 2, 'cpus' ) } + memory = { check_max( 8.GB * task.attempt, 'memory' ) } + time = { task.attempt == 3 ? 1440.h : task.attempt == 2 ? 48.h : 2.h } + } + + withLabel:'mc_medium' { + cpus = { check_max( 4, 'cpus' ) } + memory = { check_max( 16.GB * task.attempt, 'memory' ) } + time = { task.attempt == 3 ? 1440.h : task.attempt == 2 ? 48.h : 2.h } + } + + withLabel:'mc_large'{ + cpus = { check_max( 8, 'cpus' ) } + memory = { check_max( 32.GB * task.attempt, 'memory' ) } + time = { task.attempt == 3 ? 1440.h : task.attempt == 2 ? 48.h : 2.h } + } + + withLabel:'mc_huge'{ + cpus = { check_max( 32, 'cpus' ) } + memory = { check_max( 512.GB * task.attempt, 'memory' ) } + time = { task.attempt == 3 ? 1440.h : task.attempt == 2 ? 48.h : 2.h } + } + } + } + + pathogen_loose { + params { + config_profile_description = 'Pathogen (loose) MPI-EVA profile, provided by nf-core/configs.' + bwaalnn = 0.01 + bwaalnl = 16 + } + } + pathogen_strict { + params { + config_profile_description = 'Pathogen (strict) MPI-EVA SDAG profile, provided by nf-core/configs.' + bwaalnn = 0.1 + bwaalnl = 32 + } + } + human { + params { + config_profile_description = 'Human MPI-EVA SDAG profile, provided by nf-core/configs.' + bwaalnn = 0.01 + bwaalnl = 16500 + } + } +} diff --git a/docs/eva.md b/docs/eva.md new file mode 100644 index 0000000..d69ef03 --- /dev/null +++ b/docs/eva.md @@ -0,0 +1,29 @@ +# nf-core/configs: EVA Configuration + +All nf-core pipelines have been successfully configured for use on the Department of Genetics and Archaeogenetic's clusters at the [Max Planck Institute for Evolutionary Anthropology (MPI-EVA)](http://eva.mpg.de). + +To use, run the pipeline with `-profile eva`. You can further with optimise submissions by specifying which cluster queue you are using e,g, `-profile eva,archgen`. This will download and launch the [`eva.config`](../conf/eva.config) which has been pre-configured with a setup suitable for the `all.q` queue. The number of parallel jobs that run is currently limited to 8. + +Using this profile, a docker image containing all of the required software will be downloaded, and converted to a `singularity` image before execution of the pipeline. The image will currently be centrally stored here: + +## Additional Profiles + +We currently also offer profiles for the different department's specific nodes. + +### archgen + +If you specify `-profile eva,archgen` you will be able to use the nodes available on the `archgen.q` queue. + +Note the following characteristics of this profile: + +- By default, job resources are assigned a maximum number of CPUs of 32, 256 GB maximum memory and 720.h maximum wall time. +- Using this profile will currently store singularity images in a cache under `/mnt/archgen/users/singularity_scratch/cache/`. All archgen users currently have read/write access to this directory, however this will likely change to a read-only directory in the future that will be managed by the IT team. +- Intermediate files will be _automatically_ cleaned up (see `debug` below if you don't want this to happen) on successful run completion. + +>NB: You will need an account and VPN access to use the cluster at MPI-EVA in order to run the pipeline. If in doubt contact the IT team. +>NB: Nextflow will need to submit the jobs via SGE to the clusters and as such the commands above will have to be executed on one of the head nodes. If in doubt contact IT. + +### debug + +This simple profile just turns off automatic clean up of intermediate files. This can be useful for debugging. Specify e.g. with `-profile eva,archgen` + diff --git a/docs/pipeline/eager/eva.md b/docs/pipeline/eager/eva.md new file mode 100644 index 0000000..4537182 --- /dev/null +++ b/docs/pipeline/eager/eva.md @@ -0,0 +1,34 @@ +# nf-core/configs: eva eager specific configuration + +Extra specific configuration for eager pipeline + +## Usage + +To use, run the pipeline with `-profile eva`. + +This will download and launch the eager specific [`eva.config`](../../../conf/pipeline/eager/eva.config) which has been pre-configured with a setup suitable for the MPI-EVA cluster. + +Example: `nextflow run nf-core/eager -profile eva` + +## eager specific configurations for eva + +Specific configurations for eva has been made for eager. + +### General profiles + +- The general MPI-EVA profile runs with default nf-core/eager parameters, but with modifications to account for issues SGE have with Java tools. + +#### big_data + +- This defines larger base computing resources for when working with very deep sequenced or high-endogenous samples. + +### Contextual profiles + +#### Human Pop-Gen + +* `human`: optimised for mapping of human aDNA reads (i.e. bwa aln defaults as `-l 16500, -n 0.01`) + +#### Pathogen + +* `pathogen_loose`: optimised for mapping of human aDNA reads (i.e. bwa aln defaults as `-l 16 -n 0.01`) +* `pathogen_strict`: optimised for mapping of human aDNA reads (i.e. bwa aln defaults as `-l 32, -n 0.1`) \ No newline at end of file diff --git a/nfcore_custom.config b/nfcore_custom.config index 429e3c1..9e94f83 100644 --- a/nfcore_custom.config +++ b/nfcore_custom.config @@ -23,6 +23,7 @@ profiles { crick { includeConfig "${params.custom_config_base}/conf/crick.config" } czbiohub_aws { includeConfig "${params.custom_config_base}/conf/czbiohub_aws.config" } ebc { includeConfig "${params.custom_config_base}/conf/ebc.config" } + eva { includeConfig "${params.custom_config_base}/conf/eva.config" } icr_davros { includeConfig "${params.custom_config_base}/conf/icr_davros.config" } imperial { includeConfig "${params.custom_config_base}/conf/imperial.config" } imperial_mb { includeConfig "${params.custom_config_base}/conf/imperial_mb.config" } From a78aac327dde8ca0cdb84a011ccfc96c3e3a509e Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Sun, 14 Feb 2021 17:58:50 +0100 Subject: [PATCH 02/10] Linting fixes --- docs/eva.md | 3 +-- docs/pipeline/eager/eva.md | 6 +++--- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/docs/eva.md b/docs/eva.md index d69ef03..42575b2 100644 --- a/docs/eva.md +++ b/docs/eva.md @@ -2,7 +2,7 @@ All nf-core pipelines have been successfully configured for use on the Department of Genetics and Archaeogenetic's clusters at the [Max Planck Institute for Evolutionary Anthropology (MPI-EVA)](http://eva.mpg.de). -To use, run the pipeline with `-profile eva`. You can further with optimise submissions by specifying which cluster queue you are using e,g, `-profile eva,archgen`. This will download and launch the [`eva.config`](../conf/eva.config) which has been pre-configured with a setup suitable for the `all.q` queue. The number of parallel jobs that run is currently limited to 8. +To use, run the pipeline with `-profile eva`. You can further with optimise submissions by specifying which cluster queue you are using e,g, `-profile eva,archgen`. This will download and launch the [`eva.config`](../conf/eva.config) which has been pre-configured with a setup suitable for the `all.q` queue. The number of parallel jobs that run is currently limited to 8. Using this profile, a docker image containing all of the required software will be downloaded, and converted to a `singularity` image before execution of the pipeline. The image will currently be centrally stored here: @@ -26,4 +26,3 @@ Note the following characteristics of this profile: ### debug This simple profile just turns off automatic clean up of intermediate files. This can be useful for debugging. Specify e.g. with `-profile eva,archgen` - diff --git a/docs/pipeline/eager/eva.md b/docs/pipeline/eager/eva.md index 4537182..a8dc563 100644 --- a/docs/pipeline/eager/eva.md +++ b/docs/pipeline/eager/eva.md @@ -26,9 +26,9 @@ Specific configurations for eva has been made for eager. #### Human Pop-Gen -* `human`: optimised for mapping of human aDNA reads (i.e. bwa aln defaults as `-l 16500, -n 0.01`) +- `human`: optimised for mapping of human aDNA reads (i.e. bwa aln defaults as `-l 16500, -n 0.01`) #### Pathogen -* `pathogen_loose`: optimised for mapping of human aDNA reads (i.e. bwa aln defaults as `-l 16 -n 0.01`) -* `pathogen_strict`: optimised for mapping of human aDNA reads (i.e. bwa aln defaults as `-l 32, -n 0.1`) \ No newline at end of file +- `pathogen_loose`: optimised for mapping of human aDNA reads (i.e. bwa aln defaults as `-l 16 -n 0.01`) +- `pathogen_strict`: optimised for mapping of human aDNA reads (i.e. bwa aln defaults as `-l 32, -n 0.1`) From 5bfb9510c7e38ba4e43885abbd80b198a1bdfa83 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Sun, 14 Feb 2021 17:59:55 +0100 Subject: [PATCH 03/10] Add to testing --- .github/workflows/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 3e5c930..c500dfb 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -16,7 +16,7 @@ jobs: needs: test_all_profiles strategy: matrix: - profile: ['abims', 'awsbatch', 'bi','bigpurple', 'binac', 'cbe', 'ccga_dx', 'ccga_med', 'cfc', 'cfc_dev', 'crick', 'denbi_qbic', 'ebc', 'genotoul', 'genouest', 'gis', 'google', 'hebbe', 'icr_davros', 'imperial', 'imperial_mb', 'kraken', 'mpcdf', 'munin', 'oist', 'pasteur', 'phoenix', 'prince', 'seg_globe', 'shh', 'uct_hpc', 'uppmax', 'utd_ganymede', 'uzh'] + profile: ['abims', 'awsbatch', 'bi','bigpurple', 'binac', 'cbe', 'ccga_dx', 'ccga_med', 'cfc', 'cfc_dev', 'crick', 'denbi_qbic', 'ebc', 'eva', 'genotoul', 'genouest', 'gis', 'google', 'hebbe', 'icr_davros', 'imperial', 'imperial_mb', 'kraken', 'mpcdf', 'munin', 'oist', 'pasteur', 'phoenix', 'prince', 'seg_globe', 'shh', 'uct_hpc', 'uppmax', 'utd_ganymede', 'uzh'] steps: - uses: actions/checkout@v1 - name: Install Nextflow From 4339e5872c211fe2186b8e6b0e012ace45749bb3 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Sun, 14 Feb 2021 18:05:06 +0100 Subject: [PATCH 04/10] Update eager.config --- pipeline/eager.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pipeline/eager.config b/pipeline/eager.config index 9242ecb..2827496 100644 --- a/pipeline/eager.config +++ b/pipeline/eager.config @@ -11,5 +11,5 @@ profiles { shh { includeConfig "${params.custom_config_base}/conf/pipeline/eager/shh.config" } mpcdf { includeConfig "${params.custom_config_base}/conf/pipeline/eager/mpcdf.config" } - + eva { includeConfig "${params.custom_config_base}/conf/pipeline/eager/eva.config" } } From 0bac3691e3656767aa10134c76ad4536e0b70814 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Sun, 14 Feb 2021 18:07:40 +0100 Subject: [PATCH 05/10] Create mpcdf.config --- docs/pipeline/eager/mpcdf.config | 11 +++++++++++ 1 file changed, 11 insertions(+) create mode 100644 docs/pipeline/eager/mpcdf.config diff --git a/docs/pipeline/eager/mpcdf.config b/docs/pipeline/eager/mpcdf.config new file mode 100644 index 0000000..d170f4c --- /dev/null +++ b/docs/pipeline/eager/mpcdf.config @@ -0,0 +1,11 @@ +# nf-core/configs: mpcdf eager specific configuration + +Extra specific configuration for eager pipeline for the `cobra` cluster of the MPCDF + +## Usage + +To use, run the pipeline with `-profile mpcdf,cobra`. + +This will download and launch the eager specific [`mpcdf.config`](../../../conf/pipeline/eager/mpcdf.config) which has been pre-configured with a setup suitable for the mpcdf cluster. + +Currently this only applies to the `cobra` cluster, where maximum resources are adjusted accordingly. From d69af0f287700e82680a958295d41c3aa231b086 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Tue, 6 Apr 2021 16:42:22 +0200 Subject: [PATCH 06/10] Merging --- conf/pipeline/eager/eva.config | 14 +++++++------- conf/pipeline/eager/shh.config | 6 ++++++ 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/conf/pipeline/eager/eva.config b/conf/pipeline/eager/eva.config index 8850cf1..46fddae 100644 --- a/conf/pipeline/eager/eva.config +++ b/conf/pipeline/eager/eva.config @@ -63,19 +63,19 @@ process { } withName: fastqc { - clusterOptions = { "-S /bin/bash -l h_vmem=${task.memory.toMega().plus(1000).toString().replaceAll(/[\sB]/,'')}M,virtual_free=${task.memory.toMega().plus(1000).toString().replaceAll(/[\sB]/,'')}M" } + clusterOptions = { "-S /bin/bash -l h_vmem=${task.memory.toMega().plus(4000).toString().replaceAll(/[\sB]/,'')}M,virtual_free=${task.memory.toMega().plus(4000).toString().replaceAll(/[\sB]/,'')}M" } } withName: adapter_removal { - clusterOptions = { "-S /bin/bash -l h_vmem=${task.memory.toMega().plus(1000).toString().replaceAll(/[\sB]/,'')}M,virtual_free=${task.memory.toMega().plus(1000).toString().replaceAll(/[\sB]/,'')}M" } + clusterOptions = { "-S /bin/bash -l h_vmem=${task.memory.toMega().plus(12000).toString().replaceAll(/[\sB]/,'')}M,virtual_free=${task.memory.toMega().plus(12000).toString().replaceAll(/[\sB]/,'')}M" } } withName: dedup { - clusterOptions = { "-S /bin/bash -l h_vmem=${task.memory.toMega().plus(1000).toString().replaceAll(/[\sB]/,'')}M,virtual_free=${task.memory.toMega().plus(1000).toString().replaceAll(/[\sB]/,'')}M" } + clusterOptions = { "-S /bin/bash -l h_vmem=${task.memory.toMega().plus(4000).toString().replaceAll(/[\sB]/,'')}M,virtual_free=${task.memory.toMega().plus(4000).toString().replaceAll(/[\sB]/,'')}M" } } withName: markduplicates { - clusterOptions = { "-S /bin/bash -l h_vmem=${task.memory.toMega().plus(5000).toString().replaceAll(/[\sB]/,'')}M,virtual_free=${task.memory.toMega().plus(1000).toString().replaceAll(/[\sB]/,'')}M" } + clusterOptions = { "-S /bin/bash -l h_vmem=${task.memory.toMega().plus(8000).toString().replaceAll(/[\sB]/,'')}M,virtual_free=${task.memory.toMega().plus(8000).toString().replaceAll(/[\sB]/,'')}M" } } withName: malt { @@ -99,11 +99,11 @@ process { } withName: qualimap { - clusterOptions = { "-S /bin/bash -l h_vmem=${task.memory.toMega().plus(5000).toString().replaceAll(/[\sB]/,'')}M,virtual_free=${task.memory.toMega().plus(1000).toString().replaceAll(/[\sB]/,'')}M" } + clusterOptions = { "-S /bin/bash -l h_vmem=${task.memory.toMega().plus(5000).toString().replaceAll(/[\sB]/,'')}M,virtual_free=${task.memory.toMega().plus(5000).toString().replaceAll(/[\sB]/,'')}M" } } withName: damageprofiler { - clusterOptions = { "-S /bin/bash -l h_vmem=${task.memory.toMega().plus(5000).toString().replaceAll(/[\sB]/,'')}M,virtual_free=${task.memory.toMega().plus(1000).toString().replaceAll(/[\sB]/,'')}M" } + clusterOptions = { "-S /bin/bash -l h_vmem=${task.memory.toMega().plus(5000).toString().replaceAll(/[\sB]/,'')}M,virtual_free=${task.memory.toMega().plus(5000).toString().replaceAll(/[\sB]/,'')}M" } } withName: circularmapper { @@ -115,7 +115,7 @@ process { } withName: preseq { - clusterOptions = { "-S /bin/bash -l h_vmem=${task.memory.toMega().plus(4000).toString().replaceAll(/[\sB]/,'')}M,virtual_free=${task.memory.toMega().plus(1000).toString().replaceAll(/[\sB]/,'')}M" } + clusterOptions = { "-S /bin/bash -l h_vmem=${task.memory.toMega().plus(4000).toString().replaceAll(/[\sB]/,'')}M,virtual_free=${task.memory.toMega().plus(4000).toString().replaceAll(/[\sB]/,'')}M" } } } diff --git a/conf/pipeline/eager/shh.config b/conf/pipeline/eager/shh.config index 44d1c3c..5046df0 100644 --- a/conf/pipeline/eager/shh.config +++ b/conf/pipeline/eager/shh.config @@ -20,6 +20,12 @@ process { queue = { task.memory > 756.GB ? 'supercruncher' : 'long' } } + withName: circulargenerator { + cpus = { check_max( 1, 'cpus' ) } + memory = { check_max( 4.GB * task.attempt, 'memory' ) } + time = { check_max( 4.h * task.attempt, 'time' ) } + } + withLabel:'sc_tiny'{ cpus = { check_max( 1, 'cpus' ) } memory = { check_max( 1.GB * task.attempt, 'memory' ) } From be1d35840d65ac0dfeccc13c511d99d2236f0f20 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Tue, 6 Apr 2021 17:48:16 +0200 Subject: [PATCH 07/10] Add EVA and EAGER@EVA --- .github/workflows/main.yml | 2 +- conf/eva.config | 2 +- conf/pipeline/eager/eva.config | 37 ++++++++++++++++++---------------- docs/eva.md | 2 +- docs/pipeline/eager/eva.md | 2 +- 5 files changed, 24 insertions(+), 21 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index c500dfb..96776a6 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -16,7 +16,7 @@ jobs: needs: test_all_profiles strategy: matrix: - profile: ['abims', 'awsbatch', 'bi','bigpurple', 'binac', 'cbe', 'ccga_dx', 'ccga_med', 'cfc', 'cfc_dev', 'crick', 'denbi_qbic', 'ebc', 'eva', 'genotoul', 'genouest', 'gis', 'google', 'hebbe', 'icr_davros', 'imperial', 'imperial_mb', 'kraken', 'mpcdf', 'munin', 'oist', 'pasteur', 'phoenix', 'prince', 'seg_globe', 'shh', 'uct_hpc', 'uppmax', 'utd_ganymede', 'uzh'] + profile: ['abims', 'awsbatch', 'bi','bigpurple', 'binac', 'cbe', 'ccga_dx', 'ccga_med', 'cfc', 'cfc_dev', 'crick', 'denbi_qbic', 'ebc', 'eddie', 'eva', 'genotoul', 'genouest', 'gis', 'google', 'hebbe', 'icr_davros', 'ifb_core', 'imperial', 'imperial_mb', 'jax', 'kraken', 'mpcdf', 'munin', 'oist', 'pasteur', 'phoenix', 'prince', 'seg_globe', 'shh', 'uct_hpc', 'uppmax', 'utd_ganymede', 'uzh'] steps: - uses: actions/checkout@v1 - name: Install Nextflow diff --git a/conf/eva.config b/conf/eva.config index 497085d..c0a1d4f 100644 --- a/conf/eva.config +++ b/conf/eva.config @@ -49,4 +49,4 @@ profiles { debug { cleanup = false } -} +} \ No newline at end of file diff --git a/conf/pipeline/eager/eva.config b/conf/pipeline/eager/eva.config index 46fddae..2e07d57 100644 --- a/conf/pipeline/eager/eva.config +++ b/conf/pipeline/eager/eva.config @@ -9,10 +9,12 @@ params { // Specific nf-core/eager process configuration process { + beforeScript = 'export _JAVA_OPTIONS="-XX:ParallelGCThreads=1 -XX:+PrintCommandLineFlags"' + maxRetries = 2 // Solution for clusterOptions comes from here: https://github.com/nextflow-io/nextflow/issues/332 + personal toMega conversion - clusterOptions = { "-S /bin/bash -j y -o output.log -l h_vmem=${task.memory.toMega().toString().replaceAll(/[\sB]/,'')}M,virtual_free=${task.memory.toMega().toString().replaceAll(/[\sB]/,'')}M" } + clusterOptions = { "-S /bin/bash -j y -o output.log -l h_vmem=${task.memory.toGiga()}G,virtual_free=${task.memory.toGiga()}G" } withLabel:'sc_tiny'{ cpus = { check_max( 1, 'cpus' ) } @@ -59,63 +61,64 @@ process { // Fixes for SGE and Java incompatibility due to Java using more memory than you tell it to use withName: makeSeqDict { - clusterOptions = { "-S /bin/bash -l h_vmem=${task.memory.toMega().plus(4000).toString().replaceAll(/[\sB]/,'')}M,virtual_free=${task.memory.toMega().plus(4000).toString().replaceAll(/[\sB]/,'')}M" } + clusterOptions = { "-S /bin/bash -v JAVA_OPTS='-XX:ParallelGCThreads=1' -l h_vmem=${(task.memory.toGiga() + 3)}G,virtual_free=${(task.memory.toGiga() + 3)}G" } } withName: fastqc { - clusterOptions = { "-S /bin/bash -l h_vmem=${task.memory.toMega().plus(4000).toString().replaceAll(/[\sB]/,'')}M,virtual_free=${task.memory.toMega().plus(4000).toString().replaceAll(/[\sB]/,'')}M" } + clusterOptions = { "-S /bin/bash -l h_vmem=${(task.memory.toGiga() * 2)}G,virtual_free=${(task.memory.toGiga() * 2)}G" } } withName: adapter_removal { - clusterOptions = { "-S /bin/bash -l h_vmem=${task.memory.toMega().plus(12000).toString().replaceAll(/[\sB]/,'')}M,virtual_free=${task.memory.toMega().plus(12000).toString().replaceAll(/[\sB]/,'')}M" } + clusterOptions = { "-S /bin/bash -l h_vmem=${(task.memory.toGiga() * 2)}G,virtual_free=${(task.memory.toGiga() * 2)}G" } } withName: dedup { - clusterOptions = { "-S /bin/bash -l h_vmem=${task.memory.toMega().plus(4000).toString().replaceAll(/[\sB]/,'')}M,virtual_free=${task.memory.toMega().plus(4000).toString().replaceAll(/[\sB]/,'')}M" } + clusterOptions = { "-S /bin/bash -l h_vmem=${(task.memory.toGiga() * 2)}G,virtual_free=${(task.memory.toGiga() * 2)}G" } } withName: markduplicates { - clusterOptions = { "-S /bin/bash -l h_vmem=${task.memory.toMega().plus(8000).toString().replaceAll(/[\sB]/,'')}M,virtual_free=${task.memory.toMega().plus(8000).toString().replaceAll(/[\sB]/,'')}M" } + clusterOptions = { "-S /bin/bash -l h_vmem=${(task.memory.toGiga() + 6)}G,virtual_free=${(task.memory.toGiga() + 6)}G" } } withName: malt { - clusterOptions = { "-S /bin/bash -l h_vmem=${task.memory.toMega().plus(1000).toString().replaceAll(/[\sB]/,'')}M,virtual_free=${task.memory.toMega().plus(1000).toString().replaceAll(/[\sB]/,'')}M" } + clusterOptions = { "-S /bin/bash -l h_vmem=${(task.memory.toGiga() * 2)}G,virtual_free=${(task.memory.toGiga() * 2)}G" } } withName: maltextract { - clusterOptions = { "-S /bin/bash -l h_vmem=${task.memory.toMega().plus(1000).toString().replaceAll(/[\sB]/,'')}M,virtual_free=${task.memory.toMega().plus(1000).toString().replaceAll(/[\sB]/,'')}M" } + clusterOptions = { "-S /bin/bash -l h_vmem=${(task.memory.toGiga() * 2)}G,virtual_free=${(task.memory.toGiga() * 2)}G" } } withName: multivcfanalyzer { - clusterOptions = { "-S /bin/bash -l h_vmem=${task.memory.toMega().plus(1000).toString().replaceAll(/[\sB]/,'')}M,virtual_free=${task.memory.toMega().plus(1000).toString().replaceAll(/[\sB]/,'')}M" } + clusterOptions = { "-S /bin/bash -l h_vmem=${(task.memory.toGiga() * 2)}G,virtual_free=${(task.memory.toGiga() * 2)}G" } } withName: mtnucratio { - clusterOptions = { "-S /bin/bash -l h_vmem=${task.memory.toMega().plus(1000).toString().replaceAll(/[\sB]/,'')}M,virtual_free=${task.memory.toMega().plus(1000).toString().replaceAll(/[\sB]/,'')}M" } + clusterOptions = { "-S /bin/bash -l h_vmem=${(task.memory.toGiga() * 2)}G,virtual_free=${(task.memory.toGiga() * 2)}G" } } withName: vcf2genome { - clusterOptions = { "-S /bin/bash -l h_vmem=${task.memory.toMega().plus(1000).toString().replaceAll(/[\sB]/,'')}M,virtual_free=${task.memory.toMega().plus(1000).toString().replaceAll(/[\sB]/,'')}M" } + clusterOptions = { "-S /bin/bash -l h_vmem=${(task.memory.toGiga() * 2)}G,virtual_free=${(task.memory.toGiga() * 2)}G" } } withName: qualimap { - clusterOptions = { "-S /bin/bash -l h_vmem=${task.memory.toMega().plus(5000).toString().replaceAll(/[\sB]/,'')}M,virtual_free=${task.memory.toMega().plus(5000).toString().replaceAll(/[\sB]/,'')}M" } + clusterOptions = { "-S /bin/bash -l h_vmem=${(task.memory.toGiga() + 6)}G,virtual_free=${(task.memory.toGiga() + 6)}G" } } withName: damageprofiler { - clusterOptions = { "-S /bin/bash -l h_vmem=${task.memory.toMega().plus(5000).toString().replaceAll(/[\sB]/,'')}M,virtual_free=${task.memory.toMega().plus(5000).toString().replaceAll(/[\sB]/,'')}M" } + clusterOptions = { "-S /bin/bash -l h_vmem=${(task.memory.toGiga() + 6)}G,virtual_free=${(task.memory.toGiga() + 6)}G" } } withName: circularmapper { - clusterOptions = { "-S /bin/bash -l h_vmem=${task.memory.toMega().plus(1000).toString().replaceAll(/[\sB]/,'')}M,virtual_free=${task.memory.toMega().plus(1000).toString().replaceAll(/[\sB]/,'')}M" } + clusterOptions = { "-S /bin/bash -l h_vmem=${(task.memory.toGiga() * 2)}G,virtual_free=${(task.memory.toGiga() * 2)}G" } } withName: circulargenerator { - clusterOptions = { "-S /bin/bash -l h_vmem=${task.memory.toMega().plus(1000).toString().replaceAll(/[\sB]/,'')}M,virtual_free=${task.memory.toMega().plus(1000).toString().replaceAll(/[\sB]/,'')}M" } + clusterOptions = { "-S /bin/bash -l h_vmem=${(task.memory.toGiga() * 2)}G,virtual_free=${(task.memory.toGiga() * 2)}G" } } withName: preseq { - clusterOptions = { "-S /bin/bash -l h_vmem=${task.memory.toMega().plus(4000).toString().replaceAll(/[\sB]/,'')}M,virtual_free=${task.memory.toMega().plus(4000).toString().replaceAll(/[\sB]/,'')}M" } + clusterOptions = { "-S /bin/bash -l h_vmem=${(task.memory.toGiga() * 2)}G,virtual_free=${(task.memory.toGiga() * 2)}G" } + errorStrategy = { task.exitStatus in [143,137,104,134,139] ? 'retry' : 'ignore' } } } @@ -209,4 +212,4 @@ profiles { bwaalnl = 16500 } } -} +} \ No newline at end of file diff --git a/docs/eva.md b/docs/eva.md index 42575b2..9b83222 100644 --- a/docs/eva.md +++ b/docs/eva.md @@ -25,4 +25,4 @@ Note the following characteristics of this profile: ### debug -This simple profile just turns off automatic clean up of intermediate files. This can be useful for debugging. Specify e.g. with `-profile eva,archgen` +This simple profile just turns off automatic clean up of intermediate files. This can be useful for debugging. Specify e.g. with `-profile eva,archgen` \ No newline at end of file diff --git a/docs/pipeline/eager/eva.md b/docs/pipeline/eager/eva.md index a8dc563..71f4a2e 100644 --- a/docs/pipeline/eager/eva.md +++ b/docs/pipeline/eager/eva.md @@ -31,4 +31,4 @@ Specific configurations for eva has been made for eager. #### Pathogen - `pathogen_loose`: optimised for mapping of human aDNA reads (i.e. bwa aln defaults as `-l 16 -n 0.01`) -- `pathogen_strict`: optimised for mapping of human aDNA reads (i.e. bwa aln defaults as `-l 32, -n 0.1`) +- `pathogen_strict`: optimised for mapping of human aDNA reads (i.e. bwa aln defaults as `-l 32, -n 0.1`) \ No newline at end of file From 504c10d6ff5d2965a55f282c81ae66325ae0d380 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Tue, 6 Apr 2021 17:50:25 +0200 Subject: [PATCH 08/10] Update README.md --- README.md | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/README.md b/README.md index 56c73b6..687a1ae 100644 --- a/README.md +++ b/README.md @@ -6,20 +6,20 @@ A repository for hosting Nextflow configuration files containing custom paramete ## Table of contents -- [Using an existing config](#using-an-existing-config) - - [Configuration and parameters](#configuration-and-parameters) - - [Offline usage](#offline-usage) -- [Adding a new config](#adding-a-new-config) - - [Checking user hostnames](#checking-user-hostnames) - - [Testing](#testing) - - [Documentation](#documentation) - - [Uploading to `nf-core/configs`](#uploading-to-nf-coreconfigs) -- [Adding a new pipeline-specific config](#adding-a-new-pipeline-specific-config) - - [Pipeline-specific institutional documentation](#pipeline-specific-institutional-documentation) - - [Pipeline-specific documentation](#pipeline-specific-documentation) - - [Enabling pipeline-specific configs within a pipeline](#enabling-pipeline-specific-configs-within-a-pipeline) - - [Create the pipeline-specific `nf-core/configs` files](#create-the-pipeline-specific-nf-coreconfigs-files) -- [Help](#help) +* [Using an existing config](#using-an-existing-config) + * [Configuration and parameters](#configuration-and-parameters) + * [Offline usage](#offline-usage) +* [Adding a new config](#adding-a-new-config) + * [Checking user hostnames](#checking-user-hostnames) + * [Testing](#testing) + * [Documentation](#documentation) + * [Uploading to `nf-core/configs`](#uploading-to-nf-coreconfigs) +* [Adding a new pipeline-specific config](#adding-a-new-pipeline-specific-config) + * [Pipeline-specific institutional documentation](#pipeline-specific-institutional-documentation) + * [Pipeline-specific documentation](#pipeline-specific-documentation) + * [Enabling pipeline-specific configs within a pipeline](#enabling-pipeline-specific-configs-within-a-pipeline) + * [Create the pipeline-specific `nf-core/configs` files](#create-the-pipeline-specific-nf-coreconfigs-files) +* [Help](#help) ## Using an existing config From 60c66d93aca43d7ae3473ff182c9a83e087695cc Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Tue, 6 Apr 2021 17:53:27 +0200 Subject: [PATCH 09/10] Markdown linting --- docs/eva.md | 2 +- docs/pipeline/eager/eva.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/eva.md b/docs/eva.md index 9b83222..24a964c 100644 --- a/docs/eva.md +++ b/docs/eva.md @@ -25,4 +25,4 @@ Note the following characteristics of this profile: ### debug -This simple profile just turns off automatic clean up of intermediate files. This can be useful for debugging. Specify e.g. with `-profile eva,archgen` \ No newline at end of file +This simple profile just turns off automatic clean up of intermediate files. This can be useful for debugging. Specify e.g. with `-profile eva,archgen`. diff --git a/docs/pipeline/eager/eva.md b/docs/pipeline/eager/eva.md index 71f4a2e..a8dc563 100644 --- a/docs/pipeline/eager/eva.md +++ b/docs/pipeline/eager/eva.md @@ -31,4 +31,4 @@ Specific configurations for eva has been made for eager. #### Pathogen - `pathogen_loose`: optimised for mapping of human aDNA reads (i.e. bwa aln defaults as `-l 16 -n 0.01`) -- `pathogen_strict`: optimised for mapping of human aDNA reads (i.e. bwa aln defaults as `-l 32, -n 0.1`) \ No newline at end of file +- `pathogen_strict`: optimised for mapping of human aDNA reads (i.e. bwa aln defaults as `-l 32, -n 0.1`) From ce465cca57113aa4cf568f86a35211b0f2d1e4d0 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Wed, 7 Apr 2021 19:27:24 +0200 Subject: [PATCH 10/10] Apply suggestions from code review Co-authored-by: Alexander Peltzer --- conf/eva.config | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/conf/eva.config b/conf/eva.config index c0a1d4f..ad71e12 100644 --- a/conf/eva.config +++ b/conf/eva.config @@ -32,7 +32,6 @@ profiles { max_cpus = 32 max_time = 720.h //Illumina iGenomes reference file path - igenomes_base = "/projects1/public_data/igenomes/" } process { @@ -49,4 +48,4 @@ profiles { debug { cleanup = false } -} \ No newline at end of file +}