Merge pull request #224 from jfy133/eva

Add EVA profile for MPI-EVA (clean)
2024-11-22 00:26:03 +00:00 · 2021-04-07 19:56:00 +02:00 · 2021-04-07 19:56:00 +02:00 · a2a47fdecc
commit a2a47fdecc
parent 91f50aad4e 161c3a33d4
10 changed files with 350 additions and 2 deletions
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@ -16,7 +16,7 @@ jobs:
    needs: test_all_profiles
    strategy:
        matrix:
-          profile: ['abims', 'awsbatch', 'bi','bigpurple', 'binac', 'biohpc_gen', 'cbe', 'ccga_dx', 'ccga_med', 'cfc', 'cfc_dev', 'crick', 'denbi_qbic', 'ebc', 'eddie', 'genotoul', 'genouest', 'gis', 'google', 'hebbe', 'icr_davros', 'ifb_core', 'imperial', 'imperial_mb', 'jax', 'kraken', 'mpcdf', 'munin', 'oist', 'pasteur', 'phoenix', 'prince', 'seg_globe', 'shh', 'uct_hpc', 'uppmax', 'utd_ganymede', 'uzh']
+          profile: ['abims', 'awsbatch', 'bi','bigpurple', 'binac', 'biohpc_gen', 'cbe', 'ccga_dx', 'ccga_med', 'cfc', 'cfc_dev', 'crick', 'denbi_qbic', 'ebc', 'eddie', 'eva', 'genotoul', 'genouest', 'gis', 'google', 'hebbe', 'icr_davros', 'ifb_core', 'imperial', 'imperial_mb', 'jax', 'kraken', 'mpcdf', 'munin', 'oist', 'pasteur', 'phoenix', 'prince', 'seg_globe', 'shh', 'uct_hpc', 'uppmax', 'utd_ganymede', 'uzh']
    steps:
      - uses: actions/checkout@v1
      - name: Install Nextflow
--- a/README.md
+++ b/README.md
@ -108,6 +108,7 @@ Currently documentation is available for the following systems:
 * [CZBIOHUB_AWS](docs/czbiohub.md)
 * [DENBI_QBIC](docs/denbi_qbic.md)
 * [EBC](docs/ebc.md)
+* [EVA](docs/eva.md)
 * [GENOTOUL](docs/genotoul.md)
 * [GENOUEST](docs/genouest.md)
 * [GIS](docs/gis.md)
@ -176,6 +177,7 @@ Currently documentation is available for the following pipelines within specific
  * [UPPMAX](docs/pipeline/ampliseq/uppmax.md)
 * eager
  * [SHH](docs/pipeline/eager/shh.md)
+  * [EVA](docs/pipeline/eager/eva.md)
 * rnafusion
  * [MUNIN](docs/pipeline/rnafusion/munin.md)
 * sarek
--- a/conf/eva.config
+++ b/conf/eva.config
@ -0,0 +1,51 @@
+//Profile config names for nf-core/configs
+params {
+  config_profile_description = 'Generic MPI-EVA cluster(s) profile provided by nf-core/configs.'
+  config_profile_contact = 'James Fellows Yates (@jfy133)'
+  config_profile_url = 'https://eva.mpg.de'
+}
+
+// Preform work directory cleanup after a successful run
+cleanup = true
+
+singularity {
+    enabled = true
+    autoMounts = true
+}
+
+process {
+    executor = 'sge'
+    penv = 'smp'
+    queue = 'all.q'
+}
+
+executor {
+    queueSize = 8
+}
+
+profiles {
+    archgen {
+      params {
+        igenomes_base = "/projects1/public_data/igenomes/"
+        config_profile_description = 'MPI-EVA archgen profile, provided by nf-core/configs.'
+        max_memory = 256.GB
+        max_cpus = 32
+        max_time = 720.h
+        //Illumina iGenomes reference file path
+      }
+
+      process {
+        queue = 'archgen.q'
+      }
+
+      singularity {
+        cacheDir = "/mnt/archgen/users/singularity_scratch"
+
+      }
+
+    }
+     // Profile to deactivate automatic cleanup of work directory after a successful run. Overwrites cleanup option.
+    debug {
+      cleanup = false
+    }
+}
--- a/conf/pipeline/eager/eva.config
+++ b/conf/pipeline/eager/eva.config
@ -0,0 +1,215 @@
+// Profile config names for nf-core/configs
+
+params {
+  // Specific nf-core/configs params
+  config_profile_contact = 'James Fellows Yates (@jfy133)'
+  config_profile_description = 'nf-core/eager EVA profile provided by nf-core/configs'
+}
+
+// Specific nf-core/eager process configuration
+process {
+
+  beforeScript = 'export _JAVA_OPTIONS="-XX:ParallelGCThreads=1 -XX:+PrintCommandLineFlags"'
+
+  maxRetries = 2
+
+  // Solution for clusterOptions comes from here: https://github.com/nextflow-io/nextflow/issues/332 + personal toMega conversion
+  clusterOptions = { "-S /bin/bash -j y -o output.log -l h_vmem=${task.memory.toGiga()}G,virtual_free=${task.memory.toGiga()}G" }
+
+  withLabel:'sc_tiny'{
+    cpus = { check_max( 1, 'cpus' ) }
+    memory = { check_max( 1.GB * task.attempt, 'memory' ) }
+    time = { task.attempt == 3 ? 1440.h : task.attempt == 2 ? 48.h : 2.h }
+  }
+
+  withLabel:'sc_small'{
+    cpus = { check_max( 1, 'cpus' ) }
+    memory = { check_max( 4.GB * task.attempt, 'memory' ) }
+    time = { task.attempt == 3 ? 1440.h : task.attempt == 2 ? 48.h : 2.h }
+  }
+
+  withLabel:'sc_medium'{
+    cpus = { check_max( 1, 'cpus' ) }
+    memory = { check_max( 8.GB * task.attempt, 'memory' ) }
+    time = { task.attempt == 3 ? 1440.h : task.attempt == 2 ? 48.h : 2.h }
+  }
+
+  withLabel:'mc_small'{
+    cpus = { check_max( 2, 'cpus' ) }
+    memory = { check_max( 4.GB * task.attempt, 'memory' ) }
+    time = { task.attempt == 3 ? 1440.h : task.attempt == 2 ? 48.h : 2.h }
+  }
+
+  withLabel:'mc_medium' {
+    cpus = { check_max( 4, 'cpus' ) }
+    memory = { check_max( 8.GB * task.attempt, 'memory' ) }
+    time = { task.attempt == 3 ? 1440.h : task.attempt == 2 ? 48.h : 2.h }
+  }
+
+  withLabel:'mc_large'{
+     cpus = { check_max( 8, 'cpus' ) }
+     memory = { check_max( 16.GB * task.attempt, 'memory' ) }
+     time = { task.attempt == 3 ? 1440.h : task.attempt == 2 ? 48.h : 2.h }
+  }
+
+  withLabel:'mc_huge'{
+     cpus = { check_max( 32, 'cpus' ) }
+     memory = { check_max( 256.GB * task.attempt, 'memory' ) }
+     time = { task.attempt == 3 ? 1440.h : task.attempt == 2 ? 48.h : 2.h }
+   }
+  
+  // Fixes for SGE and Java incompatibility due to Java using more memory than you tell it to use
+
+  withName: makeSeqDict {
+    clusterOptions = { "-S /bin/bash -v JAVA_OPTS='-XX:ParallelGCThreads=1' -l h_vmem=${(task.memory.toGiga() + 3)}G,virtual_free=${(task.memory.toGiga() + 3)}G" }
+   }
+      
+  withName: fastqc {
+    clusterOptions = { "-S /bin/bash -l h_vmem=${(task.memory.toGiga() * 2)}G,virtual_free=${(task.memory.toGiga() * 2)}G" }
+   }
+
+  withName: adapter_removal {
+    clusterOptions = { "-S /bin/bash -l h_vmem=${(task.memory.toGiga() * 2)}G,virtual_free=${(task.memory.toGiga() * 2)}G" }
+   }
+
+  withName: dedup {
+    clusterOptions = { "-S /bin/bash -l h_vmem=${(task.memory.toGiga() * 2)}G,virtual_free=${(task.memory.toGiga() * 2)}G" }
+   }
+
+  withName: markduplicates {
+    clusterOptions = { "-S /bin/bash -l h_vmem=${(task.memory.toGiga() + 6)}G,virtual_free=${(task.memory.toGiga() + 6)}G" }
+   }
+
+  withName: malt {
+    clusterOptions = { "-S /bin/bash -l h_vmem=${(task.memory.toGiga() * 2)}G,virtual_free=${(task.memory.toGiga() * 2)}G" }
+   }
+
+  withName: maltextract {
+    clusterOptions = { "-S /bin/bash -l h_vmem=${(task.memory.toGiga() * 2)}G,virtual_free=${(task.memory.toGiga() * 2)}G" }
+   }
+
+  withName: multivcfanalyzer {
+    clusterOptions = { "-S /bin/bash -l h_vmem=${(task.memory.toGiga() * 2)}G,virtual_free=${(task.memory.toGiga() * 2)}G" }
+   }
+
+  withName: mtnucratio {
+    clusterOptions = { "-S /bin/bash -l h_vmem=${(task.memory.toGiga() * 2)}G,virtual_free=${(task.memory.toGiga() * 2)}G" }
+   }
+
+  withName: vcf2genome {
+    clusterOptions = { "-S /bin/bash -l h_vmem=${(task.memory.toGiga() * 2)}G,virtual_free=${(task.memory.toGiga() * 2)}G" }
+   }
+
+  withName: qualimap {
+    clusterOptions = { "-S /bin/bash -l h_vmem=${(task.memory.toGiga() + 6)}G,virtual_free=${(task.memory.toGiga() + 6)}G" }
+   }
+
+  withName: damageprofiler {
+    clusterOptions = { "-S /bin/bash -l h_vmem=${(task.memory.toGiga() + 6)}G,virtual_free=${(task.memory.toGiga() + 6)}G" }
+   }
+
+  withName: circularmapper {
+    clusterOptions = { "-S /bin/bash -l h_vmem=${(task.memory.toGiga() * 2)}G,virtual_free=${(task.memory.toGiga() * 2)}G" }
+   }
+
+  withName: circulargenerator {
+    clusterOptions = { "-S /bin/bash -l h_vmem=${(task.memory.toGiga() * 2)}G,virtual_free=${(task.memory.toGiga() * 2)}G" }
+   }
+
+  withName: preseq {
+    clusterOptions = { "-S /bin/bash -l h_vmem=${(task.memory.toGiga() * 2)}G,virtual_free=${(task.memory.toGiga() * 2)}G" }
+    errorStrategy = { task.exitStatus in [143,137,104,134,139] ? 'retry' : 'ignore' }
+   }
+
+}
+
+profiles {
+ 
+  big_data {
+  
+    params {
+      // Specific nf-core/configs params
+      config_profile_contact = 'James Fellows Yates (@jfy133)'
+      config_profile_description = 'nf-core/eager big-data EVA profile provided by nf-core/configs'
+    }
+  
+    executor {
+      queueSize = 6
+     }
+
+    process {
+
+      maxRetries = 2
+
+        withName:hostremoval_input_fastq {
+          cpus = { check_max( 1, 'cpus' ) }
+          memory = { check_max( 32.GB * task.attempt, 'memory' ) }
+          time = 1440.h
+        }
+
+        withLabel:'sc_tiny'{
+          cpus = { check_max( 1, 'cpus' ) }
+          memory = { check_max( 2.GB * task.attempt, 'memory' ) }
+          time = { task.attempt == 3 ? 1440.h : task.attempt == 2 ? 48.h : 2.h }
+        }
+
+        withLabel:'sc_small'{
+          cpus = { check_max( 1, 'cpus' ) }
+          memory = { check_max( 8.GB * task.attempt, 'memory' ) }
+          time = { task.attempt == 3 ? 1440.h : task.attempt == 2 ? 48.h : 2.h }
+        }
+
+        withLabel:'sc_medium'{
+          cpus = { check_max( 1, 'cpus' ) }
+          memory = { check_max( 16.GB * task.attempt, 'memory' ) }
+          time = { task.attempt == 3 ? 1440.h : task.attempt == 2 ? 48.h : 2.h }
+        }
+
+        withLabel:'mc_small'{
+          cpus = { check_max( 2, 'cpus' ) }
+          memory = { check_max( 8.GB * task.attempt, 'memory' ) }
+          time = { task.attempt == 3 ? 1440.h : task.attempt == 2 ? 48.h : 2.h }
+        }
+
+        withLabel:'mc_medium' {
+          cpus = { check_max( 4, 'cpus' ) }
+          memory = { check_max( 16.GB * task.attempt, 'memory' ) }
+          time = { task.attempt == 3 ? 1440.h : task.attempt == 2 ? 48.h : 2.h }
+        }
+
+        withLabel:'mc_large'{
+           cpus = { check_max( 8, 'cpus' ) }
+           memory = { check_max( 32.GB * task.attempt, 'memory' ) }
+           time = { task.attempt == 3 ? 1440.h : task.attempt == 2 ? 48.h : 2.h }
+        }
+
+        withLabel:'mc_huge'{
+           cpus = { check_max( 32, 'cpus' ) }
+           memory = { check_max( 512.GB * task.attempt, 'memory' ) }
+           time = { task.attempt == 3 ? 1440.h : task.attempt == 2 ? 48.h : 2.h }
+         }
+      }
+  }
+  
+  pathogen_loose {
+    params {
+      config_profile_description = 'Pathogen (loose) MPI-EVA profile, provided by nf-core/configs.'
+      bwaalnn = 0.01
+      bwaalnl = 16
+    }
+  }
+  pathogen_strict {
+    params {
+      config_profile_description = 'Pathogen (strict) MPI-EVA SDAG profile, provided by nf-core/configs.'
+      bwaalnn = 0.1
+      bwaalnl = 32
+     }
+  }
+  human {
+      params {
+      config_profile_description = 'Human MPI-EVA SDAG profile, provided by nf-core/configs.'
+      bwaalnn = 0.01
+      bwaalnl = 16500
+     }
+  }
+}
--- a/conf/pipeline/eager/shh.config
+++ b/conf/pipeline/eager/shh.config
@ -20,6 +20,12 @@ process {
    queue = { task.memory > 756.GB ? 'supercruncher' : 'long' }
  }
  
+  withName: circulargenerator {
+      cpus = { check_max( 1, 'cpus' ) }
+      memory = { check_max( 4.GB * task.attempt, 'memory' ) }
+      time = { check_max( 4.h * task.attempt, 'time' ) }
+  }
+  
  withLabel:'sc_tiny'{
    cpus = { check_max( 1, 'cpus' ) }
    memory = { check_max( 1.GB * task.attempt, 'memory' ) }
--- a/docs/eva.md
+++ b/docs/eva.md
@ -0,0 +1,28 @@
+# nf-core/configs: EVA Configuration
+
+All nf-core pipelines have been successfully configured for use on the Department of Genetics and Archaeogenetic's clusters at the [Max Planck Institute for Evolutionary Anthropology (MPI-EVA)](http://eva.mpg.de).
+
+To use, run the pipeline with `-profile eva`. You can further with optimise submissions by specifying which cluster queue you are using e,g, `-profile eva,archgen`. This will download and launch the [`eva.config`](../conf/eva.config) which has been pre-configured with a setup suitable for the `all.q` queue. The number of parallel jobs that run is currently limited to 8.
+
+Using this profile, a docker image containing all of the required software will be downloaded, and converted to a `singularity` image before execution of the pipeline. The image will currently be centrally stored here:
+
+## Additional Profiles
+
+We currently also offer profiles for the different department's specific nodes.
+
+### archgen
+
+If you specify `-profile eva,archgen` you will be able to use the nodes available on the `archgen.q` queue.
+
+Note the following characteristics of this profile:
+
+- By default, job resources are assigned a maximum number of CPUs of 32, 256 GB maximum memory and 720.h maximum wall time.
+- Using this profile will currently store singularity images in a cache under `/mnt/archgen/users/singularity_scratch/cache/`. All archgen users currently have read/write access to this directory, however this will likely change to a read-only directory in the future that will be managed by the IT team.
+- Intermediate files will be _automatically_ cleaned up (see `debug` below if you don't want this to happen) on successful run completion.
+
+>NB: You will need an account and VPN access to use the cluster at MPI-EVA in order to run the pipeline. If in doubt contact the IT team.
+>NB: Nextflow will need to submit the jobs via SGE to the clusters and as such the commands above will have to be executed on one of the head nodes. If in doubt contact IT.
+
+### debug
+
+This simple profile just turns off automatic clean up of intermediate files. This can be useful for debugging. Specify e.g. with `-profile eva,archgen`.
--- a/docs/pipeline/eager/eva.md
+++ b/docs/pipeline/eager/eva.md
@ -0,0 +1,34 @@
+# nf-core/configs: eva eager specific configuration
+
+Extra specific configuration for eager pipeline
+
+## Usage
+
+To use, run the pipeline with `-profile eva`.
+
+This will download and launch the eager specific [`eva.config`](../../../conf/pipeline/eager/eva.config) which has been pre-configured with a setup suitable for the MPI-EVA cluster.
+
+Example: `nextflow run nf-core/eager -profile eva`
+
+## eager specific configurations for eva
+
+Specific configurations for eva has been made for eager.
+
+### General profiles
+
+- The general MPI-EVA profile runs with default nf-core/eager parameters, but with modifications to account for issues SGE have with Java tools.
+
+#### big_data
+
+- This defines larger base computing resources for when working with very deep sequenced or high-endogenous samples.
+
+### Contextual profiles
+
+#### Human Pop-Gen
+
+- `human`: optimised for mapping of human aDNA reads (i.e. bwa aln defaults as `-l 16500, -n 0.01`)
+
+#### Pathogen
+
+- `pathogen_loose`: optimised for mapping of human aDNA reads (i.e. bwa aln defaults as `-l 16 -n 0.01`)
+- `pathogen_strict`: optimised for mapping of human aDNA reads (i.e. bwa aln defaults as `-l 32, -n 0.1`)
--- a/docs/pipeline/eager/mpcdf.config
+++ b/docs/pipeline/eager/mpcdf.config
@ -0,0 +1,11 @@
+# nf-core/configs: mpcdf eager specific configuration
+
+Extra specific configuration for eager pipeline for the `cobra` cluster of the MPCDF
+
+## Usage
+
+To use, run the pipeline with `-profile mpcdf,cobra`.
+
+This will download and launch the eager specific [`mpcdf.config`](../../../conf/pipeline/eager/mpcdf.config) which has been pre-configured with a setup suitable for the mpcdf cluster.
+
+Currently this only applies to the `cobra` cluster, where maximum resources are adjusted accordingly.
--- a/nfcore_custom.config
+++ b/nfcore_custom.config
@ -25,6 +25,7 @@ profiles {
  czbiohub_aws { includeConfig "${params.custom_config_base}/conf/czbiohub_aws.config" }
  ebc          { includeConfig "${params.custom_config_base}/conf/ebc.config" }
  eddie        { includeConfig "${params.custom_config_base}/conf/eddie.config" }
+  eva          { includeConfig "${params.custom_config_base}/conf/eva.config" }
  icr_davros   { includeConfig "${params.custom_config_base}/conf/icr_davros.config" }
  ifb_core     { includeConfig "${params.custom_config_base}/conf/ifb_core.config" }
  imperial     { includeConfig "${params.custom_config_base}/conf/imperial.config" }
--- a/pipeline/eager.config
+++ b/pipeline/eager.config
@ -11,5 +11,5 @@
 profiles {
  shh { includeConfig "${params.custom_config_base}/conf/pipeline/eager/shh.config" }
  mpcdf { includeConfig "${params.custom_config_base}/conf/pipeline/eager/mpcdf.config" }
-
+  eva { includeConfig "${params.custom_config_base}/conf/pipeline/eager/eva.config" }
 }