1
0
Fork 0
mirror of https://github.com/MillironX/nf-configs.git synced 2024-11-22 08:29:54 +00:00

Merge pull request #224 from jfy133/eva

Add EVA profile for MPI-EVA (clean)
This commit is contained in:
James A. Fellows Yates 2021-04-07 19:56:00 +02:00 committed by GitHub
commit a2a47fdecc
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
10 changed files with 350 additions and 2 deletions

View file

@ -16,7 +16,7 @@ jobs:
needs: test_all_profiles needs: test_all_profiles
strategy: strategy:
matrix: matrix:
profile: ['abims', 'awsbatch', 'bi','bigpurple', 'binac', 'biohpc_gen', 'cbe', 'ccga_dx', 'ccga_med', 'cfc', 'cfc_dev', 'crick', 'denbi_qbic', 'ebc', 'eddie', 'genotoul', 'genouest', 'gis', 'google', 'hebbe', 'icr_davros', 'ifb_core', 'imperial', 'imperial_mb', 'jax', 'kraken', 'mpcdf', 'munin', 'oist', 'pasteur', 'phoenix', 'prince', 'seg_globe', 'shh', 'uct_hpc', 'uppmax', 'utd_ganymede', 'uzh'] profile: ['abims', 'awsbatch', 'bi','bigpurple', 'binac', 'biohpc_gen', 'cbe', 'ccga_dx', 'ccga_med', 'cfc', 'cfc_dev', 'crick', 'denbi_qbic', 'ebc', 'eddie', 'eva', 'genotoul', 'genouest', 'gis', 'google', 'hebbe', 'icr_davros', 'ifb_core', 'imperial', 'imperial_mb', 'jax', 'kraken', 'mpcdf', 'munin', 'oist', 'pasteur', 'phoenix', 'prince', 'seg_globe', 'shh', 'uct_hpc', 'uppmax', 'utd_ganymede', 'uzh']
steps: steps:
- uses: actions/checkout@v1 - uses: actions/checkout@v1
- name: Install Nextflow - name: Install Nextflow

View file

@ -108,6 +108,7 @@ Currently documentation is available for the following systems:
* [CZBIOHUB_AWS](docs/czbiohub.md) * [CZBIOHUB_AWS](docs/czbiohub.md)
* [DENBI_QBIC](docs/denbi_qbic.md) * [DENBI_QBIC](docs/denbi_qbic.md)
* [EBC](docs/ebc.md) * [EBC](docs/ebc.md)
* [EVA](docs/eva.md)
* [GENOTOUL](docs/genotoul.md) * [GENOTOUL](docs/genotoul.md)
* [GENOUEST](docs/genouest.md) * [GENOUEST](docs/genouest.md)
* [GIS](docs/gis.md) * [GIS](docs/gis.md)
@ -176,6 +177,7 @@ Currently documentation is available for the following pipelines within specific
* [UPPMAX](docs/pipeline/ampliseq/uppmax.md) * [UPPMAX](docs/pipeline/ampliseq/uppmax.md)
* eager * eager
* [SHH](docs/pipeline/eager/shh.md) * [SHH](docs/pipeline/eager/shh.md)
* [EVA](docs/pipeline/eager/eva.md)
* rnafusion * rnafusion
* [MUNIN](docs/pipeline/rnafusion/munin.md) * [MUNIN](docs/pipeline/rnafusion/munin.md)
* sarek * sarek

51
conf/eva.config Normal file
View file

@ -0,0 +1,51 @@
//Profile config names for nf-core/configs
params {
config_profile_description = 'Generic MPI-EVA cluster(s) profile provided by nf-core/configs.'
config_profile_contact = 'James Fellows Yates (@jfy133)'
config_profile_url = 'https://eva.mpg.de'
}
// Preform work directory cleanup after a successful run
cleanup = true
singularity {
enabled = true
autoMounts = true
}
process {
executor = 'sge'
penv = 'smp'
queue = 'all.q'
}
executor {
queueSize = 8
}
profiles {
archgen {
params {
igenomes_base = "/projects1/public_data/igenomes/"
config_profile_description = 'MPI-EVA archgen profile, provided by nf-core/configs.'
max_memory = 256.GB
max_cpus = 32
max_time = 720.h
//Illumina iGenomes reference file path
}
process {
queue = 'archgen.q'
}
singularity {
cacheDir = "/mnt/archgen/users/singularity_scratch"
}
}
// Profile to deactivate automatic cleanup of work directory after a successful run. Overwrites cleanup option.
debug {
cleanup = false
}
}

View file

@ -0,0 +1,215 @@
// Profile config names for nf-core/configs
params {
// Specific nf-core/configs params
config_profile_contact = 'James Fellows Yates (@jfy133)'
config_profile_description = 'nf-core/eager EVA profile provided by nf-core/configs'
}
// Specific nf-core/eager process configuration
process {
beforeScript = 'export _JAVA_OPTIONS="-XX:ParallelGCThreads=1 -XX:+PrintCommandLineFlags"'
maxRetries = 2
// Solution for clusterOptions comes from here: https://github.com/nextflow-io/nextflow/issues/332 + personal toMega conversion
clusterOptions = { "-S /bin/bash -j y -o output.log -l h_vmem=${task.memory.toGiga()}G,virtual_free=${task.memory.toGiga()}G" }
withLabel:'sc_tiny'{
cpus = { check_max( 1, 'cpus' ) }
memory = { check_max( 1.GB * task.attempt, 'memory' ) }
time = { task.attempt == 3 ? 1440.h : task.attempt == 2 ? 48.h : 2.h }
}
withLabel:'sc_small'{
cpus = { check_max( 1, 'cpus' ) }
memory = { check_max( 4.GB * task.attempt, 'memory' ) }
time = { task.attempt == 3 ? 1440.h : task.attempt == 2 ? 48.h : 2.h }
}
withLabel:'sc_medium'{
cpus = { check_max( 1, 'cpus' ) }
memory = { check_max( 8.GB * task.attempt, 'memory' ) }
time = { task.attempt == 3 ? 1440.h : task.attempt == 2 ? 48.h : 2.h }
}
withLabel:'mc_small'{
cpus = { check_max( 2, 'cpus' ) }
memory = { check_max( 4.GB * task.attempt, 'memory' ) }
time = { task.attempt == 3 ? 1440.h : task.attempt == 2 ? 48.h : 2.h }
}
withLabel:'mc_medium' {
cpus = { check_max( 4, 'cpus' ) }
memory = { check_max( 8.GB * task.attempt, 'memory' ) }
time = { task.attempt == 3 ? 1440.h : task.attempt == 2 ? 48.h : 2.h }
}
withLabel:'mc_large'{
cpus = { check_max( 8, 'cpus' ) }
memory = { check_max( 16.GB * task.attempt, 'memory' ) }
time = { task.attempt == 3 ? 1440.h : task.attempt == 2 ? 48.h : 2.h }
}
withLabel:'mc_huge'{
cpus = { check_max( 32, 'cpus' ) }
memory = { check_max( 256.GB * task.attempt, 'memory' ) }
time = { task.attempt == 3 ? 1440.h : task.attempt == 2 ? 48.h : 2.h }
}
// Fixes for SGE and Java incompatibility due to Java using more memory than you tell it to use
withName: makeSeqDict {
clusterOptions = { "-S /bin/bash -v JAVA_OPTS='-XX:ParallelGCThreads=1' -l h_vmem=${(task.memory.toGiga() + 3)}G,virtual_free=${(task.memory.toGiga() + 3)}G" }
}
withName: fastqc {
clusterOptions = { "-S /bin/bash -l h_vmem=${(task.memory.toGiga() * 2)}G,virtual_free=${(task.memory.toGiga() * 2)}G" }
}
withName: adapter_removal {
clusterOptions = { "-S /bin/bash -l h_vmem=${(task.memory.toGiga() * 2)}G,virtual_free=${(task.memory.toGiga() * 2)}G" }
}
withName: dedup {
clusterOptions = { "-S /bin/bash -l h_vmem=${(task.memory.toGiga() * 2)}G,virtual_free=${(task.memory.toGiga() * 2)}G" }
}
withName: markduplicates {
clusterOptions = { "-S /bin/bash -l h_vmem=${(task.memory.toGiga() + 6)}G,virtual_free=${(task.memory.toGiga() + 6)}G" }
}
withName: malt {
clusterOptions = { "-S /bin/bash -l h_vmem=${(task.memory.toGiga() * 2)}G,virtual_free=${(task.memory.toGiga() * 2)}G" }
}
withName: maltextract {
clusterOptions = { "-S /bin/bash -l h_vmem=${(task.memory.toGiga() * 2)}G,virtual_free=${(task.memory.toGiga() * 2)}G" }
}
withName: multivcfanalyzer {
clusterOptions = { "-S /bin/bash -l h_vmem=${(task.memory.toGiga() * 2)}G,virtual_free=${(task.memory.toGiga() * 2)}G" }
}
withName: mtnucratio {
clusterOptions = { "-S /bin/bash -l h_vmem=${(task.memory.toGiga() * 2)}G,virtual_free=${(task.memory.toGiga() * 2)}G" }
}
withName: vcf2genome {
clusterOptions = { "-S /bin/bash -l h_vmem=${(task.memory.toGiga() * 2)}G,virtual_free=${(task.memory.toGiga() * 2)}G" }
}
withName: qualimap {
clusterOptions = { "-S /bin/bash -l h_vmem=${(task.memory.toGiga() + 6)}G,virtual_free=${(task.memory.toGiga() + 6)}G" }
}
withName: damageprofiler {
clusterOptions = { "-S /bin/bash -l h_vmem=${(task.memory.toGiga() + 6)}G,virtual_free=${(task.memory.toGiga() + 6)}G" }
}
withName: circularmapper {
clusterOptions = { "-S /bin/bash -l h_vmem=${(task.memory.toGiga() * 2)}G,virtual_free=${(task.memory.toGiga() * 2)}G" }
}
withName: circulargenerator {
clusterOptions = { "-S /bin/bash -l h_vmem=${(task.memory.toGiga() * 2)}G,virtual_free=${(task.memory.toGiga() * 2)}G" }
}
withName: preseq {
clusterOptions = { "-S /bin/bash -l h_vmem=${(task.memory.toGiga() * 2)}G,virtual_free=${(task.memory.toGiga() * 2)}G" }
errorStrategy = { task.exitStatus in [143,137,104,134,139] ? 'retry' : 'ignore' }
}
}
profiles {
big_data {
params {
// Specific nf-core/configs params
config_profile_contact = 'James Fellows Yates (@jfy133)'
config_profile_description = 'nf-core/eager big-data EVA profile provided by nf-core/configs'
}
executor {
queueSize = 6
}
process {
maxRetries = 2
withName:hostremoval_input_fastq {
cpus = { check_max( 1, 'cpus' ) }
memory = { check_max( 32.GB * task.attempt, 'memory' ) }
time = 1440.h
}
withLabel:'sc_tiny'{
cpus = { check_max( 1, 'cpus' ) }
memory = { check_max( 2.GB * task.attempt, 'memory' ) }
time = { task.attempt == 3 ? 1440.h : task.attempt == 2 ? 48.h : 2.h }
}
withLabel:'sc_small'{
cpus = { check_max( 1, 'cpus' ) }
memory = { check_max( 8.GB * task.attempt, 'memory' ) }
time = { task.attempt == 3 ? 1440.h : task.attempt == 2 ? 48.h : 2.h }
}
withLabel:'sc_medium'{
cpus = { check_max( 1, 'cpus' ) }
memory = { check_max( 16.GB * task.attempt, 'memory' ) }
time = { task.attempt == 3 ? 1440.h : task.attempt == 2 ? 48.h : 2.h }
}
withLabel:'mc_small'{
cpus = { check_max( 2, 'cpus' ) }
memory = { check_max( 8.GB * task.attempt, 'memory' ) }
time = { task.attempt == 3 ? 1440.h : task.attempt == 2 ? 48.h : 2.h }
}
withLabel:'mc_medium' {
cpus = { check_max( 4, 'cpus' ) }
memory = { check_max( 16.GB * task.attempt, 'memory' ) }
time = { task.attempt == 3 ? 1440.h : task.attempt == 2 ? 48.h : 2.h }
}
withLabel:'mc_large'{
cpus = { check_max( 8, 'cpus' ) }
memory = { check_max( 32.GB * task.attempt, 'memory' ) }
time = { task.attempt == 3 ? 1440.h : task.attempt == 2 ? 48.h : 2.h }
}
withLabel:'mc_huge'{
cpus = { check_max( 32, 'cpus' ) }
memory = { check_max( 512.GB * task.attempt, 'memory' ) }
time = { task.attempt == 3 ? 1440.h : task.attempt == 2 ? 48.h : 2.h }
}
}
}
pathogen_loose {
params {
config_profile_description = 'Pathogen (loose) MPI-EVA profile, provided by nf-core/configs.'
bwaalnn = 0.01
bwaalnl = 16
}
}
pathogen_strict {
params {
config_profile_description = 'Pathogen (strict) MPI-EVA SDAG profile, provided by nf-core/configs.'
bwaalnn = 0.1
bwaalnl = 32
}
}
human {
params {
config_profile_description = 'Human MPI-EVA SDAG profile, provided by nf-core/configs.'
bwaalnn = 0.01
bwaalnl = 16500
}
}
}

View file

@ -20,6 +20,12 @@ process {
queue = { task.memory > 756.GB ? 'supercruncher' : 'long' } queue = { task.memory > 756.GB ? 'supercruncher' : 'long' }
} }
withName: circulargenerator {
cpus = { check_max( 1, 'cpus' ) }
memory = { check_max( 4.GB * task.attempt, 'memory' ) }
time = { check_max( 4.h * task.attempt, 'time' ) }
}
withLabel:'sc_tiny'{ withLabel:'sc_tiny'{
cpus = { check_max( 1, 'cpus' ) } cpus = { check_max( 1, 'cpus' ) }
memory = { check_max( 1.GB * task.attempt, 'memory' ) } memory = { check_max( 1.GB * task.attempt, 'memory' ) }

28
docs/eva.md Normal file
View file

@ -0,0 +1,28 @@
# nf-core/configs: EVA Configuration
All nf-core pipelines have been successfully configured for use on the Department of Genetics and Archaeogenetic's clusters at the [Max Planck Institute for Evolutionary Anthropology (MPI-EVA)](http://eva.mpg.de).
To use, run the pipeline with `-profile eva`. You can further with optimise submissions by specifying which cluster queue you are using e,g, `-profile eva,archgen`. This will download and launch the [`eva.config`](../conf/eva.config) which has been pre-configured with a setup suitable for the `all.q` queue. The number of parallel jobs that run is currently limited to 8.
Using this profile, a docker image containing all of the required software will be downloaded, and converted to a `singularity` image before execution of the pipeline. The image will currently be centrally stored here:
## Additional Profiles
We currently also offer profiles for the different department's specific nodes.
### archgen
If you specify `-profile eva,archgen` you will be able to use the nodes available on the `archgen.q` queue.
Note the following characteristics of this profile:
- By default, job resources are assigned a maximum number of CPUs of 32, 256 GB maximum memory and 720.h maximum wall time.
- Using this profile will currently store singularity images in a cache under `/mnt/archgen/users/singularity_scratch/cache/`. All archgen users currently have read/write access to this directory, however this will likely change to a read-only directory in the future that will be managed by the IT team.
- Intermediate files will be _automatically_ cleaned up (see `debug` below if you don't want this to happen) on successful run completion.
>NB: You will need an account and VPN access to use the cluster at MPI-EVA in order to run the pipeline. If in doubt contact the IT team.
>NB: Nextflow will need to submit the jobs via SGE to the clusters and as such the commands above will have to be executed on one of the head nodes. If in doubt contact IT.
### debug
This simple profile just turns off automatic clean up of intermediate files. This can be useful for debugging. Specify e.g. with `-profile eva,archgen`.

View file

@ -0,0 +1,34 @@
# nf-core/configs: eva eager specific configuration
Extra specific configuration for eager pipeline
## Usage
To use, run the pipeline with `-profile eva`.
This will download and launch the eager specific [`eva.config`](../../../conf/pipeline/eager/eva.config) which has been pre-configured with a setup suitable for the MPI-EVA cluster.
Example: `nextflow run nf-core/eager -profile eva`
## eager specific configurations for eva
Specific configurations for eva has been made for eager.
### General profiles
- The general MPI-EVA profile runs with default nf-core/eager parameters, but with modifications to account for issues SGE have with Java tools.
#### big_data
- This defines larger base computing resources for when working with very deep sequenced or high-endogenous samples.
### Contextual profiles
#### Human Pop-Gen
- `human`: optimised for mapping of human aDNA reads (i.e. bwa aln defaults as `-l 16500, -n 0.01`)
#### Pathogen
- `pathogen_loose`: optimised for mapping of human aDNA reads (i.e. bwa aln defaults as `-l 16 -n 0.01`)
- `pathogen_strict`: optimised for mapping of human aDNA reads (i.e. bwa aln defaults as `-l 32, -n 0.1`)

View file

@ -0,0 +1,11 @@
# nf-core/configs: mpcdf eager specific configuration
Extra specific configuration for eager pipeline for the `cobra` cluster of the MPCDF
## Usage
To use, run the pipeline with `-profile mpcdf,cobra`.
This will download and launch the eager specific [`mpcdf.config`](../../../conf/pipeline/eager/mpcdf.config) which has been pre-configured with a setup suitable for the mpcdf cluster.
Currently this only applies to the `cobra` cluster, where maximum resources are adjusted accordingly.

View file

@ -25,6 +25,7 @@ profiles {
czbiohub_aws { includeConfig "${params.custom_config_base}/conf/czbiohub_aws.config" } czbiohub_aws { includeConfig "${params.custom_config_base}/conf/czbiohub_aws.config" }
ebc { includeConfig "${params.custom_config_base}/conf/ebc.config" } ebc { includeConfig "${params.custom_config_base}/conf/ebc.config" }
eddie { includeConfig "${params.custom_config_base}/conf/eddie.config" } eddie { includeConfig "${params.custom_config_base}/conf/eddie.config" }
eva { includeConfig "${params.custom_config_base}/conf/eva.config" }
icr_davros { includeConfig "${params.custom_config_base}/conf/icr_davros.config" } icr_davros { includeConfig "${params.custom_config_base}/conf/icr_davros.config" }
ifb_core { includeConfig "${params.custom_config_base}/conf/ifb_core.config" } ifb_core { includeConfig "${params.custom_config_base}/conf/ifb_core.config" }
imperial { includeConfig "${params.custom_config_base}/conf/imperial.config" } imperial { includeConfig "${params.custom_config_base}/conf/imperial.config" }

View file

@ -11,5 +11,5 @@
profiles { profiles {
shh { includeConfig "${params.custom_config_base}/conf/pipeline/eager/shh.config" } shh { includeConfig "${params.custom_config_base}/conf/pipeline/eager/shh.config" }
mpcdf { includeConfig "${params.custom_config_base}/conf/pipeline/eager/mpcdf.config" } mpcdf { includeConfig "${params.custom_config_base}/conf/pipeline/eager/mpcdf.config" }
eva { includeConfig "${params.custom_config_base}/conf/pipeline/eager/eva.config" }
} }