Merge pull request #224 from jfy133/eva

Add EVA profile for MPI-EVA (clean)
2024-11-22 08:29:54 +00:00 · 2021-04-07 19:56:00 +02:00 · 2021-04-07 19:56:00 +02:00 · a2a47fdecc
commit a2a47fdecc
parent 91f50aad4e 161c3a33d4
10 changed files with 350 additions and 2 deletions
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@ -16,7 +16,7 @@ jobs:
    needs: test_all_profiles
    strategy:
        matrix:
-          profile: ['abims', 'awsbatch', 'bi','bigpurple', 'binac', 'biohpc_gen', 'cbe', 'ccga_dx', 'ccga_med', 'cfc', 'cfc_dev', 'crick', 'denbi_qbic', 'ebc', 'eddie', 'genotoul', 'genouest', 'gis', 'google', 'hebbe', 'icr_davros', 'ifb_core', 'imperial', 'imperial_mb', 'jax', 'kraken', 'mpcdf', 'munin', 'oist', 'pasteur', 'phoenix', 'prince', 'seg_globe', 'shh', 'uct_hpc', 'uppmax', 'utd_ganymede', 'uzh']
+          profile: ['abims', 'awsbatch', 'bi','bigpurple', 'binac', 'biohpc_gen', 'cbe', 'ccga_dx', 'ccga_med', 'cfc', 'cfc_dev', 'crick', 'denbi_qbic', 'ebc', 'eddie', 'eva', 'genotoul', 'genouest', 'gis', 'google', 'hebbe', 'icr_davros', 'ifb_core', 'imperial', 'imperial_mb', 'jax', 'kraken', 'mpcdf', 'munin', 'oist', 'pasteur', 'phoenix', 'prince', 'seg_globe', 'shh', 'uct_hpc', 'uppmax', 'utd_ganymede', 'uzh']
    steps:
      - uses: actions/checkout@v1
      - name: Install Nextflow
--- a/README.md
+++ b/README.md
@ -108,6 +108,7 @@ Currently documentation is available for the following systems:
 * [CZBIOHUB_AWS](docs/czbiohub.md)
 * [DENBI_QBIC](docs/denbi_qbic.md)
 * [EBC](docs/ebc.md)
 * [EVA](docs/eva.md)
 * [GENOTOUL](docs/genotoul.md)
 * [GENOUEST](docs/genouest.md)
 * [GIS](docs/gis.md)
@ -176,6 +177,7 @@ Currently documentation is available for the following pipelines within specific
  * [UPPMAX](docs/pipeline/ampliseq/uppmax.md)
 * eager
  * [SHH](docs/pipeline/eager/shh.md)
  * [EVA](docs/pipeline/eager/eva.md)
 * rnafusion
  * [MUNIN](docs/pipeline/rnafusion/munin.md)
 * sarek
--- a/conf/eva.config
+++ b/conf/eva.config
@ -0,0 +1,51 @@
 //Profile config names for nf-core/configs
 params {
  config_profile_description = 'Generic MPI-EVA cluster(s) profile provided by nf-core/configs.'
  config_profile_contact = 'James Fellows Yates (@jfy133)'
  config_profile_url = 'https://eva.mpg.de'
 }
 // Preform work directory cleanup after a successful run
 cleanup = true
 singularity {
    enabled = true
    autoMounts = true
 }
 process {
    executor = 'sge'
    penv = 'smp'
    queue = 'all.q'
 }
 executor {
    queueSize = 8
 }
 profiles {
    archgen {
      params {
        igenomes_base = "/projects1/public_data/igenomes/"
        config_profile_description = 'MPI-EVA archgen profile, provided by nf-core/configs.'
        max_memory = 256.GB
        max_cpus = 32
        max_time = 720.h
        //Illumina iGenomes reference file path
      }
      process {
        queue = 'archgen.q'
      }
      singularity {
        cacheDir = "/mnt/archgen/users/singularity_scratch"
      }
    }
     // Profile to deactivate automatic cleanup of work directory after a successful run. Overwrites cleanup option.
    debug {
      cleanup = false
    }
 }
--- a/conf/pipeline/eager/eva.config
+++ b/conf/pipeline/eager/eva.config
@ -0,0 +1,215 @@
 // Profile config names for nf-core/configs
 params {
  // Specific nf-core/configs params
  config_profile_contact = 'James Fellows Yates (@jfy133)'
  config_profile_description = 'nf-core/eager EVA profile provided by nf-core/configs'
 }
 // Specific nf-core/eager process configuration
 process {
  beforeScript = 'export _JAVA_OPTIONS="-XX:ParallelGCThreads=1 -XX:+PrintCommandLineFlags"'
  maxRetries = 2
  // Solution for clusterOptions comes from here: https://github.com/nextflow-io/nextflow/issues/332 + personal toMega conversion
  clusterOptions = { "-S /bin/bash -j y -o output.log -l h_vmem=${task.memory.toGiga()}G,virtual_free=${task.memory.toGiga()}G" }
  withLabel:'sc_tiny'{
    cpus = { check_max( 1, 'cpus' ) }
    memory = { check_max( 1.GB * task.attempt, 'memory' ) }
    time = { task.attempt == 3 ? 1440.h : task.attempt == 2 ? 48.h : 2.h }
  }
  withLabel:'sc_small'{
    cpus = { check_max( 1, 'cpus' ) }
    memory = { check_max( 4.GB * task.attempt, 'memory' ) }
    time = { task.attempt == 3 ? 1440.h : task.attempt == 2 ? 48.h : 2.h }
  }
  withLabel:'sc_medium'{
    cpus = { check_max( 1, 'cpus' ) }
    memory = { check_max( 8.GB * task.attempt, 'memory' ) }
    time = { task.attempt == 3 ? 1440.h : task.attempt == 2 ? 48.h : 2.h }
  }
  withLabel:'mc_small'{
    cpus = { check_max( 2, 'cpus' ) }
    memory = { check_max( 4.GB * task.attempt, 'memory' ) }
    time = { task.attempt == 3 ? 1440.h : task.attempt == 2 ? 48.h : 2.h }
  }
  withLabel:'mc_medium' {
    cpus = { check_max( 4, 'cpus' ) }
    memory = { check_max( 8.GB * task.attempt, 'memory' ) }
    time = { task.attempt == 3 ? 1440.h : task.attempt == 2 ? 48.h : 2.h }
  }
  withLabel:'mc_large'{
     cpus = { check_max( 8, 'cpus' ) }
     memory = { check_max( 16.GB * task.attempt, 'memory' ) }
     time = { task.attempt == 3 ? 1440.h : task.attempt == 2 ? 48.h : 2.h }
  }
  withLabel:'mc_huge'{
     cpus = { check_max( 32, 'cpus' ) }
     memory = { check_max( 256.GB * task.attempt, 'memory' ) }
     time = { task.attempt == 3 ? 1440.h : task.attempt == 2 ? 48.h : 2.h }
   }
  // Fixes for SGE and Java incompatibility due to Java using more memory than you tell it to use
  withName: makeSeqDict {
    clusterOptions = { "-S /bin/bash -v JAVA_OPTS='-XX:ParallelGCThreads=1' -l h_vmem=${(task.memory.toGiga() + 3)}G,virtual_free=${(task.memory.toGiga() + 3)}G" }
   }
  withName: fastqc {
    clusterOptions = { "-S /bin/bash -l h_vmem=${(task.memory.toGiga() * 2)}G,virtual_free=${(task.memory.toGiga() * 2)}G" }
   }
  withName: adapter_removal {
    clusterOptions = { "-S /bin/bash -l h_vmem=${(task.memory.toGiga() * 2)}G,virtual_free=${(task.memory.toGiga() * 2)}G" }
   }
  withName: dedup {
    clusterOptions = { "-S /bin/bash -l h_vmem=${(task.memory.toGiga() * 2)}G,virtual_free=${(task.memory.toGiga() * 2)}G" }
   }
  withName: markduplicates {
    clusterOptions = { "-S /bin/bash -l h_vmem=${(task.memory.toGiga() + 6)}G,virtual_free=${(task.memory.toGiga() + 6)}G" }
   }
  withName: malt {
    clusterOptions = { "-S /bin/bash -l h_vmem=${(task.memory.toGiga() * 2)}G,virtual_free=${(task.memory.toGiga() * 2)}G" }
   }
  withName: maltextract {
    clusterOptions = { "-S /bin/bash -l h_vmem=${(task.memory.toGiga() * 2)}G,virtual_free=${(task.memory.toGiga() * 2)}G" }
   }
  withName: multivcfanalyzer {
    clusterOptions = { "-S /bin/bash -l h_vmem=${(task.memory.toGiga() * 2)}G,virtual_free=${(task.memory.toGiga() * 2)}G" }
   }
  withName: mtnucratio {
    clusterOptions = { "-S /bin/bash -l h_vmem=${(task.memory.toGiga() * 2)}G,virtual_free=${(task.memory.toGiga() * 2)}G" }
   }
  withName: vcf2genome {
    clusterOptions = { "-S /bin/bash -l h_vmem=${(task.memory.toGiga() * 2)}G,virtual_free=${(task.memory.toGiga() * 2)}G" }
   }
  withName: qualimap {
    clusterOptions = { "-S /bin/bash -l h_vmem=${(task.memory.toGiga() + 6)}G,virtual_free=${(task.memory.toGiga() + 6)}G" }
   }
  withName: damageprofiler {
    clusterOptions = { "-S /bin/bash -l h_vmem=${(task.memory.toGiga() + 6)}G,virtual_free=${(task.memory.toGiga() + 6)}G" }
   }
  withName: circularmapper {
    clusterOptions = { "-S /bin/bash -l h_vmem=${(task.memory.toGiga() * 2)}G,virtual_free=${(task.memory.toGiga() * 2)}G" }
   }
  withName: circulargenerator {
    clusterOptions = { "-S /bin/bash -l h_vmem=${(task.memory.toGiga() * 2)}G,virtual_free=${(task.memory.toGiga() * 2)}G" }
   }
  withName: preseq {
    clusterOptions = { "-S /bin/bash -l h_vmem=${(task.memory.toGiga() * 2)}G,virtual_free=${(task.memory.toGiga() * 2)}G" }
    errorStrategy = { task.exitStatus in [143,137,104,134,139] ? 'retry' : 'ignore' }
   }
 }
 profiles {
  big_data {
    params {
      // Specific nf-core/configs params
      config_profile_contact = 'James Fellows Yates (@jfy133)'
      config_profile_description = 'nf-core/eager big-data EVA profile provided by nf-core/configs'
    }
    executor {
      queueSize = 6
     }
    process {
      maxRetries = 2
        withName:hostremoval_input_fastq {
          cpus = { check_max( 1, 'cpus' ) }
          memory = { check_max( 32.GB * task.attempt, 'memory' ) }
          time = 1440.h
        }
        withLabel:'sc_tiny'{
          cpus = { check_max( 1, 'cpus' ) }
          memory = { check_max( 2.GB * task.attempt, 'memory' ) }
          time = { task.attempt == 3 ? 1440.h : task.attempt == 2 ? 48.h : 2.h }
        }
        withLabel:'sc_small'{
          cpus = { check_max( 1, 'cpus' ) }
          memory = { check_max( 8.GB * task.attempt, 'memory' ) }
          time = { task.attempt == 3 ? 1440.h : task.attempt == 2 ? 48.h : 2.h }
        }
        withLabel:'sc_medium'{
          cpus = { check_max( 1, 'cpus' ) }
          memory = { check_max( 16.GB * task.attempt, 'memory' ) }
          time = { task.attempt == 3 ? 1440.h : task.attempt == 2 ? 48.h : 2.h }
        }
        withLabel:'mc_small'{
          cpus = { check_max( 2, 'cpus' ) }
          memory = { check_max( 8.GB * task.attempt, 'memory' ) }
          time = { task.attempt == 3 ? 1440.h : task.attempt == 2 ? 48.h : 2.h }
        }
        withLabel:'mc_medium' {
          cpus = { check_max( 4, 'cpus' ) }
          memory = { check_max( 16.GB * task.attempt, 'memory' ) }
          time = { task.attempt == 3 ? 1440.h : task.attempt == 2 ? 48.h : 2.h }
        }
        withLabel:'mc_large'{
           cpus = { check_max( 8, 'cpus' ) }
           memory = { check_max( 32.GB * task.attempt, 'memory' ) }
           time = { task.attempt == 3 ? 1440.h : task.attempt == 2 ? 48.h : 2.h }
        }
        withLabel:'mc_huge'{
           cpus = { check_max( 32, 'cpus' ) }
           memory = { check_max( 512.GB * task.attempt, 'memory' ) }
           time = { task.attempt == 3 ? 1440.h : task.attempt == 2 ? 48.h : 2.h }
         }
      }
  }
  pathogen_loose {
    params {
      config_profile_description = 'Pathogen (loose) MPI-EVA profile, provided by nf-core/configs.'
      bwaalnn = 0.01
      bwaalnl = 16
    }
  }
  pathogen_strict {
    params {
      config_profile_description = 'Pathogen (strict) MPI-EVA SDAG profile, provided by nf-core/configs.'
      bwaalnn = 0.1
      bwaalnl = 32
     }
  }
  human {
      params {
      config_profile_description = 'Human MPI-EVA SDAG profile, provided by nf-core/configs.'
      bwaalnn = 0.01
      bwaalnl = 16500
     }
  }
 }
--- a/conf/pipeline/eager/shh.config
+++ b/conf/pipeline/eager/shh.config
@ -20,6 +20,12 @@ process {
    queue = { task.memory > 756.GB ? 'supercruncher' : 'long' }
  }
  withName: circulargenerator {
      cpus = { check_max( 1, 'cpus' ) }
      memory = { check_max( 4.GB * task.attempt, 'memory' ) }
      time = { check_max( 4.h * task.attempt, 'time' ) }
  }
  withLabel:'sc_tiny'{
    cpus = { check_max( 1, 'cpus' ) }
    memory = { check_max( 1.GB * task.attempt, 'memory' ) }
--- a/docs/eva.md
+++ b/docs/eva.md
@ -0,0 +1,28 @@
 # nf-core/configs: EVA Configuration
 All nf-core pipelines have been successfully configured for use on the Department of Genetics and Archaeogenetic's clusters at the [Max Planck Institute for Evolutionary Anthropology (MPI-EVA)](http://eva.mpg.de).
 To use, run the pipeline with `-profile eva`. You can further with optimise submissions by specifying which cluster queue you are using e,g, `-profile eva,archgen`. This will download and launch the [`eva.config`](../conf/eva.config) which has been pre-configured with a setup suitable for the `all.q` queue. The number of parallel jobs that run is currently limited to 8.
 Using this profile, a docker image containing all of the required software will be downloaded, and converted to a `singularity` image before execution of the pipeline. The image will currently be centrally stored here:
 ## Additional Profiles
 We currently also offer profiles for the different department's specific nodes.
 ### archgen
 If you specify `-profile eva,archgen` you will be able to use the nodes available on the `archgen.q` queue.
 Note the following characteristics of this profile:
 - By default, job resources are assigned a maximum number of CPUs of 32, 256 GB maximum memory and 720.h maximum wall time.
 - Using this profile will currently store singularity images in a cache under `/mnt/archgen/users/singularity_scratch/cache/`. All archgen users currently have read/write access to this directory, however this will likely change to a read-only directory in the future that will be managed by the IT team.
 - Intermediate files will be _automatically_ cleaned up (see `debug` below if you don't want this to happen) on successful run completion.
 >NB: You will need an account and VPN access to use the cluster at MPI-EVA in order to run the pipeline. If in doubt contact the IT team.
 >NB: Nextflow will need to submit the jobs via SGE to the clusters and as such the commands above will have to be executed on one of the head nodes. If in doubt contact IT.
 ### debug
 This simple profile just turns off automatic clean up of intermediate files. This can be useful for debugging. Specify e.g. with `-profile eva,archgen`.
--- a/docs/pipeline/eager/eva.md
+++ b/docs/pipeline/eager/eva.md
@ -0,0 +1,34 @@
 # nf-core/configs: eva eager specific configuration
 Extra specific configuration for eager pipeline
 ## Usage
 To use, run the pipeline with `-profile eva`.
 This will download and launch the eager specific [`eva.config`](../../../conf/pipeline/eager/eva.config) which has been pre-configured with a setup suitable for the MPI-EVA cluster.
 Example: `nextflow run nf-core/eager -profile eva`
 ## eager specific configurations for eva
 Specific configurations for eva has been made for eager.
 ### General profiles
 - The general MPI-EVA profile runs with default nf-core/eager parameters, but with modifications to account for issues SGE have with Java tools.
 #### big_data
 - This defines larger base computing resources for when working with very deep sequenced or high-endogenous samples.
 ### Contextual profiles
 #### Human Pop-Gen
 - `human`: optimised for mapping of human aDNA reads (i.e. bwa aln defaults as `-l 16500, -n 0.01`)
 #### Pathogen
 - `pathogen_loose`: optimised for mapping of human aDNA reads (i.e. bwa aln defaults as `-l 16 -n 0.01`)
 - `pathogen_strict`: optimised for mapping of human aDNA reads (i.e. bwa aln defaults as `-l 32, -n 0.1`)
--- a/docs/pipeline/eager/mpcdf.config
+++ b/docs/pipeline/eager/mpcdf.config
@ -0,0 +1,11 @@
 # nf-core/configs: mpcdf eager specific configuration
 Extra specific configuration for eager pipeline for the `cobra` cluster of the MPCDF
 ## Usage
 To use, run the pipeline with `-profile mpcdf,cobra`.
 This will download and launch the eager specific [`mpcdf.config`](../../../conf/pipeline/eager/mpcdf.config) which has been pre-configured with a setup suitable for the mpcdf cluster.
 Currently this only applies to the `cobra` cluster, where maximum resources are adjusted accordingly.
--- a/nfcore_custom.config
+++ b/nfcore_custom.config
@ -25,6 +25,7 @@ profiles {
  czbiohub_aws { includeConfig "${params.custom_config_base}/conf/czbiohub_aws.config" }
  ebc          { includeConfig "${params.custom_config_base}/conf/ebc.config" }
  eddie        { includeConfig "${params.custom_config_base}/conf/eddie.config" }
  eva          { includeConfig "${params.custom_config_base}/conf/eva.config" }
  icr_davros   { includeConfig "${params.custom_config_base}/conf/icr_davros.config" }
  ifb_core     { includeConfig "${params.custom_config_base}/conf/ifb_core.config" }
  imperial     { includeConfig "${params.custom_config_base}/conf/imperial.config" }
--- a/pipeline/eager.config
+++ b/pipeline/eager.config
@ -11,5 +11,5 @@
 profiles {
  shh { includeConfig "${params.custom_config_base}/conf/pipeline/eager/shh.config" }
  mpcdf { includeConfig "${params.custom_config_base}/conf/pipeline/eager/mpcdf.config" }
-
+  eva { includeConfig "${params.custom_config_base}/conf/pipeline/eager/eva.config" }
 }