1
0
Fork 0
mirror of https://github.com/MillironX/nf-configs.git synced 2024-12-22 02:38:16 +00:00

Merge branch 'master' into eva

This commit is contained in:
James A. Fellows Yates 2021-04-06 17:51:46 +02:00 committed by GitHub
commit 832edaa450
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
21 changed files with 448 additions and 39 deletions

View file

@ -114,6 +114,7 @@ Currently documentation is available for the following systems:
* [GOOGLE](docs/google.md)
* [HEBBE](docs/hebbe.md)
* [ICR_DAVROS](docs/icr_davros.md)
* [JAX](docs/jax.md)
* [KRAKEN](docs/kraken.md)
* [MPCDF](docs/mpcdf.md)
* [MUNIN](docs/munin.md)

View file

@ -7,10 +7,22 @@ params {
awsqueue = false
awsregion = 'eu-west-1'
awscli = '/home/ec2-user/miniconda/bin/aws'
tracedir = './'
}
timeline {
overwrite = true
}
report {
overwrite = true
}
trace {
overwrite = true
}
dag {
overwrite = true
}
process.executor = 'awsbatch'
process.queue = params.awsqueue
aws.region = params.awsregion
executor.awscli = params.awscli
aws.batch.cliPath = params.awscli

View file

@ -2,13 +2,13 @@
params {
config_profile_description = 'CLIP BATCH ENVIRONMENT (CBE) cluster profile provided by nf-core/configs'
config_profile_contact = 'Patrick Hüther (@phue)'
config_profile_url = 'http://www.gmi.oeaw.ac.at/'
config_profile_url = 'https://clip.science'
}
process {
executor = 'slurm'
queue = { task.memory <= 170.GB ? 'c' : 'm' }
clusterOptions = { task.time <= 8.h ? '--qos short': task.time <= 48.h ? '--qos medium' : '--qos long' }
clusterOptions = { task.time <= 1.h ? '--qos rapid' : task.time <= 8.h ? '--qos short': task.time <= 48.h ? '--qos medium' : '--qos long' }
module = 'anaconda3/2019.10'
}

View file

@ -13,7 +13,7 @@ params {
singularity {
enabled = true
runOptions = "-B /mnt"
runOptions = "-B /mnt -B /work_ifs"
}
executor {
@ -30,7 +30,7 @@ process {
params {
// illumina iGenomes reference file paths on DX Cluster
igenomes_base = '/mnt/ld_ng_out/sukmb352/references/iGenomes/references/'
igenomes_base = '/work_ifs/ikmb_repository/references/iGenomes/references/'
saveReference = true
max_memory = 250.GB
max_cpus = 20

50
conf/eddie.config Normal file
View file

@ -0,0 +1,50 @@
//Profile config names for nf-core/configs
params {
config_profile_description = 'University of Edinburgh (eddie) cluster profile provided by nf-core/configs.'
config_profile_contact = 'Alison Meynert (@ameynert)'
config_profile_url = 'https://www.ed.ac.uk/information-services/research-support/research-computing/ecdf/high-performance-computing'
}
executor {
name = "sge"
queueSize = "100"
}
process {
clusterOptions = { task.memory ? "-l h_vmem=${task.memory.bytes/task.cpus}" : null }
scratch = true
penv = { task.cpus > 1 ? "sharedmem" : null }
// common SGE error statuses
errorStrategy = {task.exitStatus in [143,137,104,134,139,140] ? 'retry' : 'finish'}
maxErrors = '-1'
maxRetries = 3
beforeScript =
"""
. /etc/profile.d/modules.sh
module load 'roslin/singularity/3.5.3'
export SINGULARITY_TMPDIR="\$TMPDIR"
"""
}
params {
saveReference = true
// iGenomes reference base
igenomes_base = '/exports/igmm/eddie/NextGenResources/igenomes'
max_memory = 384.GB
max_cpus = 32
max_time = 240.h
}
env {
MALLOC_ARENA_MAX=1
}
singularity {
envWhitelist = "SINGULARITY_TMPDIR"
runOptions = '-p'
enabled = true
autoMounts = true
}

24
conf/ifb_core.config Normal file
View file

@ -0,0 +1,24 @@
//Profile config names for nf-core/configs
params {
config_profile_description = 'The IFB core cluster profile'
config_profile_contact = 'https://community.france-bioinformatique.fr'
config_profile_url = 'https://www.france-bioinformatique.fr/'
}
singularity {
// need one image per execution
enabled = true
runOptions = '-B /shared'
}
process {
executor = 'slurm'
}
params {
igenomes_ignore = true
// Max resources requested by a normal node on genotoul.
max_memory = 240.GB
max_cpus = 28
max_time = 96.h
}

25
conf/jax.config Normal file
View file

@ -0,0 +1,25 @@
params {
config_profile_description = 'The Jackson Laboratory Sumner HPC profile provided by nf-core/configs.'
config_profile_contact = 'Asaf Peer (@peera)'
config_profile_url = 'https://jacksonlaboratory.sharepoint.com/sites/ResearchIT/SitePages/Welcome-to-Sumner.aspx'
singularity_cache_dir = '/fastscratch/singularity_cache_nfcore'
}
executor.$slurm.queueSize = 250
process {
executor = "slurm"
queue = "compute"
clusterOptions = {task.time < 72.h ? '-q batch' : '-q long'}
module = "slurm"
beforeScript = 'module load singularity'
}
singularity{
enabled = true
autoMounts = true
cacheDir = params.singularity_cache_dir
}
params {
max_memory = 768.GB
max_cpus = 70
max_time = 336.h
}

View file

@ -0,0 +1,15 @@
process {
withName : "PICARD_MARKDUPLICATES" {
clusterOptions = {"-l h_vmem=${(task.memory + 4.GB).bytes/task.cpus}"}
}
withName : "QUALIMAP_RNASEQ" {
clusterOptions = {"-l h_vmem=${(task.memory + 4.GB).bytes/task.cpus}"}
}
withName : "FASTQC" {
clusterOptions = {"-l h_vmem=${(task.memory + 4.GB).bytes/task.cpus}"}
}
}

View file

@ -0,0 +1,27 @@
process {
withName:MapReads {
cpus = 16
}
withName:BuildDict {
cpus = 1
clusterOptions = {"-l h_vmem=${(task.memory + 4.GB).bytes/task.cpus}"}
}
withName:BamQC {
cpus = 8
memory = 128.GB
clusterOptions = {"-l h_vmem=${(task.memory + 8.GB).bytes/task.cpus}"}
}
withName:MarkDuplicates {
clusterOptions = {"-l h_vmem=${(task.memory + 4.GB).bytes/task.cpus}"}
}
withName:BaseRecalibrator {
clusterOptions = {"-l h_vmem=${(task.memory + 4.GB).bytes/task.cpus}"}
}
withName:ApplyBQSR {
clusterOptions = {"-l h_vmem=${(task.memory + 4.GB).bytes/task.cpus}"}
}
withName:GatherBQSRReports {
clusterOptions = {"-l h_vmem=${(task.memory + 4.GB).bytes/task.cpus}"}
}
}

View file

@ -4,17 +4,41 @@
* -------------------------------------------------
* Defines viral reference genomes for all environments.
*/
params {
// Genome reference file paths
genomes {
'NC_045512.2' {
fasta = "https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/genome/NC_045512.2/GCF_009858895.2_ASM985889v3_genomic.200409.fna.gz"
gff = "https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/genome/NC_045512.2/GCF_009858895.2_ASM985889v3_genomic.200409.gff.gz"
// This version of the reference has been kept here for backwards compatibility.
// Please use 'MN908947.3' if possible because all primer sets are available / have been pre-prepared relative to that assembly
fasta = 'https://github.com/nf-core/test-datasets/raw/viralrecon/genome/NC_045512.2/GCF_009858895.2_ASM985889v3_genomic.200409.fna.gz'
gff = 'https://github.com/nf-core/test-datasets/raw/viralrecon/genome/NC_045512.2/GCF_009858895.2_ASM985889v3_genomic.200409.gff.gz'
}
'MN908947.3' {
fasta = "https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/genome/MN908947.3/GCA_009858895.3_ASM985889v3_genomic.200409.fna.gz"
gff = "https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/genome/MN908947.3/GCA_009858895.3_ASM985889v3_genomic.200409.gff.gz"
fasta = 'https://github.com/nf-core/test-datasets/raw/viralrecon/genome/MN908947.3/GCA_009858895.3_ASM985889v3_genomic.200409.fna.gz'
gff = 'https://github.com/nf-core/test-datasets/raw/viralrecon/genome/MN908947.3/GCA_009858895.3_ASM985889v3_genomic.200409.gff.gz'
primer_sets {
artic {
'1' {
fasta = 'https://github.com/artic-network/primer-schemes/raw/master/nCoV-2019/V1/nCoV-2019.reference.fasta'
gff = 'https://github.com/nf-core/test-datasets/raw/viralrecon/genome/MN908947.3/GCA_009858895.3_ASM985889v3_genomic.200409.gff.gz'
primer_bed = 'https://github.com/artic-network/primer-schemes/raw/master/nCoV-2019/V1/nCoV-2019.primer.bed'
scheme = 'nCoV-2019'
}
'2' {
fasta = 'https://github.com/artic-network/primer-schemes/raw/master/nCoV-2019/V2/nCoV-2019.reference.fasta'
gff = 'https://github.com/nf-core/test-datasets/raw/viralrecon/genome/MN908947.3/GCA_009858895.3_ASM985889v3_genomic.200409.gff.gz'
primer_bed = 'https://github.com/artic-network/primer-schemes/raw/master/nCoV-2019/V2/nCoV-2019.primer.bed'
scheme = 'nCoV-2019'
}
'3' {
fasta = 'https://github.com/artic-network/primer-schemes/raw/master/nCoV-2019/V3/nCoV-2019.reference.fasta'
gff = 'https://github.com/nf-core/test-datasets/raw/viralrecon/genome/MN908947.3/GCA_009858895.3_ASM985889v3_genomic.200409.gff.gz'
primer_bed = 'https://github.com/artic-network/primer-schemes/raw/master/nCoV-2019/V3/nCoV-2019.primer.bed'
scheme = 'nCoV-2019'
}
}
}
}
}
}

View file

@ -1,10 +1,10 @@
//Profile config names for nf-core/configs
params {
config_profile_description = 'Generic MPI-SHH cluster(s) profile provided by nf-core/configs.'
config_profile_description = 'Generic MPI-SHH SDAG cluster profile provided by nf-core/configs.'
config_profile_contact = 'James Fellows Yates (@jfy133), Maxime Borry (@Maxibor)'
config_profile_url = 'https://shh.mpg.de'
max_memory = 256.GB
max_cpus = 32
max_memory = 2.TB
max_cpus = 128
max_time = 720.h
//Illumina iGenomes reference file path
igenomes_base = "/projects1/public_data/igenomes/"
@ -13,14 +13,9 @@ params {
// Preform work directory cleanup after a successful run
cleanup = true
singularity {
enabled = true
autoMounts = true
cacheDir = "/projects1/singularity_scratch/cache/"
}
process {
executor = 'slurm'
queue = { task.memory > 756.GB || task.cpus > 64 ? 'supercruncher': task.time <= 2.h ? 'short' : task.time <= 48.h ? 'medium': 'long' }
}
executor {
@ -28,24 +23,10 @@ executor {
}
profiles {
cdag {
params {
config_profile_description = 'CDAG MPI-SHH profile, provided by nf-core/configs.'
}
// delete when CDAG will be fixed
process {
queue = 'long'
}
}
sdag {
params {
config_profile_description = 'SDAG MPI-SHH profile, provided by nf-core/configs.'
max_memory = 2.TB
max_cpus = 128
}
process {
queue = { task.memory > 756.GB || task.cpus > 64 ? 'supercruncher': task.time <= 2.h ? 'short' : task.time <= 48.h ? 'medium': 'long' }
}
singularity {
singularity.enabled = true
singularity.autoMounts = true
singularity.cacheDir = "/projects1/singularity_scratch/cache/"
}
// Profile to deactivate automatic cleanup of work directory after a successful run. Overwrites cleanup option.
debug {

View file

@ -5,6 +5,11 @@ params {
config_profile_url = 'http://docs.oithpc.utdallas.edu/'
}
env {
TMPDIR = '/home/$USER/scratch/tmp'
SINGULARITY_CACHEDIR = '/home/$USER/scratch/tmp'
}
singularity {
enabled = true
envWhitelist='SINGULARITY_BINDPATH'
@ -15,6 +20,28 @@ process {
beforeScript = 'module load singularity/3.2.1'
executor = 'slurm'
queue = { task.memory >= 32.GB && task.cpu <= 12 ? 'Kim': task.memory <= 24.GB && task.cpu <= 8 ? 'smallmem' : 'genomics' }
withName:TRIMGALORE {
memory = 31.GB
}
withLabel:process_low {
cpus = { check_max( 2 * task.attempt, 'cpus' ) }
memory = { check_max( 12.GB * task.attempt, 'memory' ) }
time = { check_max( 6.h * task.attempt, 'time' ) }
}
withLabel:process_medium {
cpus = { check_max( 16 * task.attempt, 'cpus' ) }
memory = { check_max( 31.GB * task.attempt, 'memory' ) }
time = { check_max( 8.h * task.attempt, 'time' ) }
}
withLabel:process_high {
cpus = { check_max( 12 * task.attempt, 'cpus' ) }
memory = { check_max( 120.GB * task.attempt, 'memory' ) }
time = { check_max( 16.h * task.attempt, 'time' ) }
}
}
params {

28
conf/wcm.config Normal file
View file

@ -0,0 +1,28 @@
singularityDir = "/athena/elementolab/scratch/reference/.singularity/singularity_images_nextflow"
params {
config_profile_description = 'Weill Cornell Medicine, Scientific Computing Unit Slurm cluster profile provided by nf-core/configs'
config_profile_contact = 'Ashley Stephen Doane, PhD (@DoaneAS)'
igenomes_base = '/athena/elementolab/scratch/reference/igenomes'
}
singularity {
enabled = true
envWhitelist='SINGULARITY_BINDPATH'
cacheDir = "/athena/elementolab/scratch/reference/.singularity/singularity_images_nextflow"
autoMounts = true
}
process {
executor = 'slurm'
queue = 'panda_physbio'
scratch = true
scratch = '/scratchLocal/`whoami`_${SLURM_JOBID}'
}
params {
max_memory = 32.GB
max_cpus = 8
max_time = 24.h
}

View file

@ -1,4 +1,6 @@
# nf-core/configs: awsbatch Configuration
To be used with `awsbatch`.
Custom queue, region and CLI path can be supplied with `params.awsqueue`, `params.awsregion`, `params.awscli`, respectively.
Custom queue and region can be supplied with `params.awsqueue`, `params.awsregion`, `params.awscli`, respectively.
Allow `overwrite` of `trace`, `timeline`, `report` and `dag` to allow resuming pipelines.

104
docs/eddie.md Normal file
View file

@ -0,0 +1,104 @@
# nf-core/configs: Eddie Configuration
nf-core pipelines sarek, rnaseq, and atacseq have all been tested on the University of Edinburgh Eddie HPC.
## Getting help
There is a Slack channel dedicated to eddie users on the MRC IGMM Slack: [https://igmm.slack.com/channels/eddie3](https://igmm.slack.com/channels/eddie3)
## Using the Eddie config profile
To use, run the pipeline with `-profile eddie` (one hyphen).
This will download and launch the [`eddie.config`](../conf/eddie.config) which has been pre-configured with a setup suitable for the [University of Edinburgh Eddie HPC](https://www.ed.ac.uk/information-services/research-support/research-computing/ecdf/high-performance-computing).
The configuration file supports running nf-core pipelines with Docker containers running under Singularity by default. Conda is not currently supported.
```bash
nextflow run nf-core/PIPELINE -profile eddie # ...rest of pipeline flags
```
Before running the pipeline you will need to install Nextflow or load it from the module system. Generally the most recent version will be the one you want. If you want to run a Nextflow pipeline that is based on [DSL2](https://www.nextflow.io/docs/latest/dsl2.html), you will need a version that ends with '-edge'.
To list versions:
```bash
module avail igmm/apps/nextflow
```
To load the most recent version:
```bash
module load igmm/apps/nextflow
```
This config enables Nextflow to manage the pipeline jobs via the SGE job scheduler and using Singularity for software management.
## Singularity set-up
Load Singularity from the module system and, if you have access to `/exports/igmm/eddie/NextGenResources`, set the Singularity cache directory to the NextGenResources path for the pipeline and version you want to run. If this does not exist, please contact the [IGMM Data Manager](data.manager@igmm.ed.ac.uk) to have it added. You can add these lines to the file `$HOME/.bashrc`, or you can run these commands before you run an nf-core pipeline.
If you do not have access to `/exports/igmm/eddie/NextGenResources`, set the Singularity cache directory to somewhere sensible that is not in your `$HOME` area (which has limited space). It will take time to download all the Singularity containers, but you can use this again.
```bash
module load singularity
export NXF_SINGULARITY_CACHEDIR="/exports/igmm/eddie/NextGenResources/nextflow/singularity/nf-core-rnaseq_v3.0"
```
Singularity will create a directory `.singularity` in your `$HOME` directory on eddie. Space on `$HOME` is very limited, so it is a good idea to create a directory somewhere else with more room and link the locations.
```bash
cd $HOME
mkdir /exports/eddie/path/to/my/area/.singularity
ln -s /exports/eddie/path/to/my/area/.singularity .singularity
```
## Running Nextflow
### On a login node
You can use a qlogin to run Nextflow, if you request more than the default 2GB of memory. Unfortunately you can't submit the initial Nextflow run process as a job as you can't qsub within a qsub.
```bash
qlogin -l h_vmem=8G
```
If your eddie terminal disconnects your Nextflow job will stop. You can run Nextflow as a bash script on the command line using `nohup` to prevent this.
```bash
nohup ./nextflow_run.sh &
```
### On a wild west node - IGMM only
Wild west nodes on eddie can be accessed via ssh (node2c15, node2c16, node3g22). To run Nextflow on one of these nodes, do it within a [screen session](https://linuxize.com/post/how-to-use-linux-screen/).
Start a new screen session.
```bash
screen -S <session_name>
```
List existing screen sessions
```bash
screen -ls
```
Reconnect to an existing screen session
```bash
screen -r <session_name>
```
## Using iGenomes references
A local copy of the iGenomes resource has been made available on the Eddie HPC for those with access to `/exports/igmm/eddie/NextGenResources` so you should be able to run the pipeline against any reference available in the `igenomes.config`.
You can do this by simply using the `--genome <GENOME_ID>` parameter.
## Adjusting maximum resources
This config is set for IGMM standard nodes which have 32 cores and 384GB memory. If you are a non-IGMM user, please see the [ECDF specification](https://www.wiki.ed.ac.uk/display/ResearchServices/Memory+Specification) and adjust the `--clusterOptions` flag appropriately, e.g.
```bash
--clusterOptions "-C mem256GB" --max_memory "256GB"
```

40
docs/ifb_core.md Normal file
View file

@ -0,0 +1,40 @@
# nf-core/configs: IFB core Configuration
All nf-core pipelines have been successfully configured for use on the cluster of the IFB (Institut Francais de Bioinformatique).
To use, run the pipeline with `-profile ifb_core`. This will download and launch the [`ifb_core.config`](../conf/ifb_core.config) which has been pre-configured with a setup suitable for the IFB core cluster. Using this profile, a docker image containing all of the required software will be downloaded, and converted to a Singularity image before execution of the pipeline.
## How to use on IFB core
Before running the pipeline you will need to load Nextflow using the environment module system on IFB core. You can do this by issuing the commands below:
```bash
# Login to a compute node
srun --pty bash
## Load Nextflow and Singularity environment modules
module purge
module load nextflow/20.04.1
# Run a downloaded/git-cloned nextflow workflow from
nextflow run \\
nf-core/workflow \\
-resume
-profile ifb_core \\
--email my-email@example.org \\
-c my-specific.config
...
# Or use the nf-core client
nextflow run nf-core/rnaseq ...
```
## Databanks
A local copy of several genomes are available in `/shared/bank` directory. See
our [databank page](https://ifb-elixirfr.gitlab.io/cluster/doc/banks/)
to search for your favorite genome.
>NB: You will need an account to use the HPC cluster on IFB core in order to run the pipeline. If in doubt contact IT or go to [account page](https://my.cluster.france-bioinformatique.fr/manager2/login).

8
docs/jax.md Normal file
View file

@ -0,0 +1,8 @@
# nf-core/configs: JAX Configuration
All nf-core pipelines have been successfully configured for use on the JAX Sumner cluster at The Jackson Laboratory.
To use, run the pipeline with `-profile jax`. This will download and launch the [`jax.config`](../conf/jax.config) which has been pre-configured with a setup suitable for JAX Sumner cluster. Using this profile, a docker image containing all of the required software will be downloaded, and converted to a Singularity image before execution of the pipeline and slurm will be used as well.
>NB: You will need an account to use the HPC cluster JAX in order to run the pipeline. If in doubt contact IT.
>NB: Nextflow should not be executed on the login nodes. If in doubt contact IT.

24
docs/wcm.md Normal file
View file

@ -0,0 +1,24 @@
# nf-core/configs: Weill Cornell Medicine Configuration
All nf-core pipelines have been successfully configured for use on the panda cluster at the WCM.
To use, run the pipeline with `-profile wcm`. This will download and launch the [`wcm.config`](../conf/wcm.config) which has been pre-configured with a setup suitable for the WCM slurm cluster. Using this profile, a docker image containing all of the required software will be downloaded, and converted to a Singularity image before execution of the pipeline.
## Running the workflow on the Pasteur cluster
Nextflow is not installed by default on the WCM cluster.
- Install Nextflow : [here](https://www.nextflow.io/docs/latest/getstarted.html#)
Nextflow manages each process as a separate job that is submitted to the cluster by using the `sbatch` command.
Nextflow shouldn't run directly on a login node but on a compute node or lab-specific interactive server when configured as a submit host.
1. Run nextflow on a compute node or interactive server with submit host capability:
```bash
# Run nextflow workflow
nextflow run \\
nf-core/chipseq \\
-resume \\
-profile test,wcm
```

View file

@ -23,8 +23,10 @@ profiles {
crick { includeConfig "${params.custom_config_base}/conf/crick.config" }
czbiohub_aws { includeConfig "${params.custom_config_base}/conf/czbiohub_aws.config" }
ebc { includeConfig "${params.custom_config_base}/conf/ebc.config" }
eddie { includeConfig "${params.custom_config_base}/conf/eddie.config" }
eva { includeConfig "${params.custom_config_base}/conf/eva.config" }
icr_davros { includeConfig "${params.custom_config_base}/conf/icr_davros.config" }
ifb_core { includeConfig "${params.custom_config_base}/conf/ifb_core.config" }
imperial { includeConfig "${params.custom_config_base}/conf/imperial.config" }
imperial_mb { includeConfig "${params.custom_config_base}/conf/imperial_mb.config" }
genotoul { includeConfig "${params.custom_config_base}/conf/genotoul.config" }
@ -46,6 +48,7 @@ profiles {
uppmax { includeConfig "${params.custom_config_base}/conf/uppmax.config" }
utd_ganymede { includeConfig "${params.custom_config_base}/conf/utd_ganymede.config" }
uzh { includeConfig "${params.custom_config_base}/conf/uzh.config" }
jax { includeConfig "${params.custom_config_base}/conf/jax.config" }
}
// If user hostnames contain one of these substring and they are

13
pipeline/rnaseq.config Normal file
View file

@ -0,0 +1,13 @@
/*
* -------------------------------------------------
* nfcore/rnaseq custom profile Nextflow config file
* -------------------------------------------------
* Config options for custom environments.
* Cluster-specific config options should be saved
* in the conf/pipeline/rnaseq folder and imported
* under a profile name here.
*/
profiles {
eddie { includeConfig "${params.custom_config_base}/conf/pipeline/rnaseq/eddie.config" }
}

View file

@ -12,4 +12,5 @@ profiles {
munin { includeConfig "${params.custom_config_base}/conf/pipeline/sarek/munin.config" }
uppmax { includeConfig "${params.custom_config_base}/conf/pipeline/sarek/uppmax.config" }
icr_davros { includeConfig "${params.custom_config_base}/conf/pipeline/sarek/icr_davros.config" }
eddie { includeConfig "${params.custom_config_base}/conf/pipeline/sarek/eddie.config" }
}