1
0
Fork 0
mirror of https://github.com/MillironX/nf-configs.git synced 2024-11-25 01:19:54 +00:00

Merge branch 'master' into bgrande/sage-aws

This commit is contained in:
Bruno Grande 2022-08-25 16:29:20 -07:00 committed by GitHub
commit 9c8cb71bc7
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
15 changed files with 291 additions and 70 deletions

View file

@ -8,5 +8,5 @@ trim_trailing_whitespace = true
indent_size = 4
indent_style = space
[*.{md,yml,yaml}]
[*.{md,yml,yaml,cff}]
indent_size = 2

View file

@ -66,6 +66,7 @@ jobs:
- "jax"
- "lugh"
- "marvin"
- "medair"
- "mjolnir_globe"
- "maestro"
- "mpcdf"

56
CITATION.cff Normal file
View file

@ -0,0 +1,56 @@
cff-version: 1.2.0
message: "If you use `nf-core tools` in your work, please cite the `nf-core` publication"
authors:
- family-names: Ewels
given-names: Philip
- family-names: Peltzer
given-names: Alexander
- family-names: Fillinger
given-names: Sven
- family-names: Patel
given-names: Harshil
- family-names: Alneberg
given-names: Johannes
- family-names: Wilm
given-names: Andreas
- family-names: Ulysse Garcia
given-names: Maxime
- family-names: Di Tommaso
given-names: Paolo
- family-names: Nahnsen
given-names: Sven
title: "The nf-core framework for community-curated bioinformatics pipelines."
version: 2.4.1
doi: 10.1038/s41587-020-0439-x
date-released: 2022-05-16
url: https://github.com/nf-core/tools
prefered-citation:
type: article
authors:
- family-names: Ewels
given-names: Philip
- family-names: Peltzer
given-names: Alexander
- family-names: Fillinger
given-names: Sven
- family-names: Patel
given-names: Harshil
- family-names: Alneberg
given-names: Johannes
- family-names: Wilm
given-names: Andreas
- family-names: Ulysse Garcia
given-names: Maxime
- family-names: Di Tommaso
given-names: Paolo
- family-names: Nahnsen
given-names: Sven
doi: 10.1038/s41587-020-0439-x
journal: nature biotechnology
start: 276
end: 278
title: "The nf-core framework for community-curated bioinformatics pipelines."
issue: 3
volume: 38
year: 2020
url: https://dx.doi.org/10.1038/s41587-020-0439-x

View file

@ -120,6 +120,7 @@ Currently documentation is available for the following systems:
- [LUGH](docs/lugh.md)
- [MAESTRO](docs/maestro.md)
- [MARVIN](docs/marvin.md)
- [MEDAIR](docs/medair.md)
- [MJOLNIR_GLOBE](docs/mjolnir_globe.md)
- [MPCDF](docs/mpcdf.md)
- [MUNIN](docs/munin.md)

View file

@ -16,19 +16,29 @@ singularity {
params {
max_memory = 180.GB
max_cpus = 36
max_time = 336.h
max_time = 336.h
}
process {
executor = 'slurm'
clusterOptions = { "-A $params.priority ${params.clusterOptions ?: ''}" }
clusterOptions = { "-A $params.priority ${params.clusterOptions ?: ''}" }
}
profiles {
dev_prio {
stub_prio {
params {
priority = 'development'
clusterOptions = "--qos=low"
max_memory = 6.GB
max_cpus = 1
max_time = 1.h
}
}
dev_prio {
params {
priority = 'development'
clusterOptions = "--qos=low"
}
}

46
conf/medair.config Normal file
View file

@ -0,0 +1,46 @@
//Profile config names for nf-core/configs
params {
config_profile_description = 'Cluster profile for medair (local cluster of Clinical Genomics Gothenburg)'
config_profile_contact = 'Clinical Genomics, Gothenburg (cgg-rd@gu.se, cgg-it@gu.se)'
config_profile_url = 'https://www.scilifelab.se/units/clinical-genomics-goteborg/'
}
//Nextflow parameters
singularity {
enabled = true
cacheDir = "/apps/bio/dependencies/nf-core/singularities"
}
profiles {
wgs {
process {
queue = 'wgs.q'
executor = 'sge'
penv = 'mpi'
process.clusterOptions = '-l excl=1'
params.max_cpus = 40
params.max_time = 48.h
params.max_memory = 128.GB
}
}
production {
process {
queue = 'production.q'
executor = 'sge'
penv = 'mpi'
process.clusterOptions = '-l excl=1'
params.max_cpus = 40
params.max_time = 480.h
params.max_memory = 128.GB
}
}
}
//Specific parameter for pipelines that can use Sentieon (e.g. nf-core/sarek, nf-core/raredisease)
process {
withLabel:'sentieon' {
container = "/apps/bio/singularities/sentieon-211204-peta.simg"
}
}

View file

@ -61,7 +61,7 @@ profiles {
params {
config_profile_description = 'MPCDF raven profile (unofficially) provided by nf-core/configs.'
memory = 2000000.MB
max_memory = 2000000.MB
max_cpus = 72
max_time = 24.h
}

View file

@ -6,11 +6,15 @@ params {
config_profile_description = 'nf-core/eager EVA profile provided by nf-core/configs'
}
env {
_JAVA_OPTIONS = "-XX:ParallelGCThreads=1"
OPENBLAS_NUM_THREADS = 1
OMP_NUM_THREADS = 1
}
// Specific nf-core/eager process configuration
process {
beforeScript = 'export _JAVA_OPTIONS="-XX:ParallelGCThreads=1"'
maxRetries = 2
// Solution for clusterOptions comes from here: https://github.com/nextflow-io/nextflow/issues/332 + personal toMega conversion
@ -69,17 +73,17 @@ process {
clusterOptions = { "-S /bin/bash -V -l h_vmem=${(task.memory.toGiga() * 2)}G" }
errorStrategy = { task.exitStatus in [1,143,137,104,134,139,140] ? 'retry' : 'finish' }
}
withName: fastqc_after_clipping {
clusterOptions = { "-S /bin/bash -V -l h_vmem=${(task.memory.toGiga() * 2)}G" }
errorStrategy = { task.exitStatus in [1,143,137,104,134,139,140] ? 'retry' : 'finish' }
errorStrategy = { task.exitStatus in [1,143,137,104,134,139,140] ? 'retry' : 'finish' }
}
withName: adapter_removal {
clusterOptions = { "-S /bin/bash -V -l h_vmem=${(task.memory.toGiga() * 2)}G" }
errorStrategy = { task.exitStatus in [1,143,137,104,134,139,140] ? 'retry' : 'finish' }
}
withName: bwa {
clusterOptions = { "-S /bin/bash -V -l h_vmem=${(task.memory.toGiga())}G,h=!(bionode01|bionode02|bionode03|bionode04|bionode05|bionode06)" }
}
@ -188,26 +192,18 @@ process {
clusterOptions = { "-S /bin/bash -V -l h_vmem=${(task.memory.toGiga() * 2)}G" }
errorStrategy = { task.exitStatus in [1,143,137,104,134,139,140] ? 'retry' : 'finish' }
}
withName:get_software_versions {
cache = false
clusterOptions = { "-S /bin/bash -V -l h=!(bionode06)" }
beforeScript = 'export _JAVA_OPTIONS="-XX:ParallelGCThreads=1 -Xmx512m"; export OPENBLAS_NUM_THREADS=1; export OMP_NUM_THREADS=1'
clusterOptions = { "-S /bin/bash -V -l h_vmem=${(task.memory.toMega())}M" }
clusterOptions = { "-S /bin/bash -V -l h_vmem=${(task.memory.toMega() * 8)}M" }
errorStrategy = { task.exitStatus in [1,143,137,104,134,139,140] ? 'retry' : 'finish' }
}
withName:eigenstrat_snp_coverage {
beforeScript = 'export OPENBLAS_NUM_THREADS=1; export OMP_NUM_THREADS=1'
}
withName:kraken_merge {
beforeScript = 'export OPENBLAS_NUM_THREADS=1; export OMP_NUM_THREADS=1'
}
withName:multiqc {
beforeScript = 'export OPENBLAS_NUM_THREADS=1; export OMP_NUM_THREADS=1;'
clusterOptions = { "-S /bin/bash -V -j y -o output.log -l h_vmem=${task.memory.toGiga() * 2}G" }
}
}
profiles {
@ -226,8 +222,6 @@ profiles {
process {
beforeScript = 'export _JAVA_OPTIONS="-XX:ParallelGCThreads=1"'
maxRetries = 2
// Solution for clusterOptions comes from here: https://github.com/nextflow-io/nextflow/issues/332 + personal toMega conversion
@ -279,7 +273,7 @@ profiles {
clusterOptions = { "-S /bin/bash -V -l h_vmem=${(task.memory.toGiga() * 3)}G" }
errorStrategy = { task.exitStatus in [1,143,137,104,134,139,140] ? 'retry' : 'finish' }
}
withName: fastqc_after_clipping {
clusterOptions = { "-S /bin/bash -V -l h_vmem=${(task.memory.toGiga() * 3)}G" }
errorStrategy = { task.exitStatus in [1,143,137,104,134,139,140] ? 'retry' : 'finish' }
@ -404,8 +398,6 @@ profiles {
process {
beforeScript = 'export _JAVA_OPTIONS="-XX:ParallelGCThreads=1"'
maxRetries = 2
// Solution for clusterOptions comes from here: https://github.com/nextflow-io/nextflow/issues/332 + personal toMega conversion
@ -457,7 +449,7 @@ profiles {
clusterOptions = { "-S /bin/bash -V -l h_vmem=${(task.memory.toGiga() * 6)}G" }
errorStrategy = { task.exitStatus in [1,143,137,104,134,139,140] ? 'retry' : 'finish' }
}
withName: fastqc_after_clipping {
clusterOptions = { "-S /bin/bash -V -l h_vmem=${(task.memory.toGiga() * 6)}G" }
errorStrategy = { task.exitStatus in [1,143,137,104,134,139,140] ? 'retry' : 'finish' }

View file

@ -1,5 +1,5 @@
process {
withName:'PICARD_MARKDUPLICATES' {
memory = { check_max( 90.GB * task.attempt, 'memory' ) }
}
@ -7,7 +7,26 @@ process {
cpus = { check_max( 16 * task.attempt, 'cpus' ) }
memory = { check_max( 80.GB * task.attempt, 'memory' ) }
}
withName:'QUALIMAP_BAMQC' {
ext.args = { "--java-mem-size=${task.memory.giga / 1.15 as long}G" }
withLabel:'sentieon' {
beforeScript = { "export PATH=\$PATH:\$SENTIEON_INSTALL_DIR/sentieon-genomics-202112.02/bin" }
}
}
withName: 'BCFTOOLS_VIEW' {
if (params.genome == 'GRCh37') {
ext.args = '--output-type z --apply-filters PASS --exclude "INFO/clinical_genomics_mipAF > 0.40 | INFO/swegenAF > 0.40 | INFO/clingen_ngiAF > 0.40 | INFO/gnomad_svAF > 0.40 "'
} else if (params.genome == 'GRCh38') {
ext.args = '--output-type z --apply-filters PASS --exclude "INFO/swegen_FRQ > 0.40"'
}
publishDir = [
enabled: false,
]
}
// Java memory fixes
withName:'QUALIMAP_BAMQC' {
clusterOptions = { "-A $params.priority ${params.clusterOptions ?: ''} ${task.memory ? "--mem ${task.memory.mega * 1.15 as long}M" : ''}" }
}
withName:'PICARD_MARKDUPLICATES' {
clusterOptions = { "-A $params.priority ${params.clusterOptions ?: ''} ${task.memory ? "--mem ${task.memory.mega * 1.15 as long}M" : ''}" }
}
}

View file

@ -8,23 +8,27 @@
params {
// Genome reference file paths
genomes {
// SARS-CoV-2
'NC_045512.2' {
// This version of the reference has been kept here for backwards compatibility.
// Please use 'MN908947.3' if possible because all primer sets are available / have been pre-prepared relative to that assembly
fasta = 'https://github.com/nf-core/test-datasets/raw/viralrecon/genome/NC_045512.2/GCF_009858895.2_ASM985889v3_genomic.200409.fna.gz'
gff = 'https://github.com/nf-core/test-datasets/raw/viralrecon/genome/NC_045512.2/GCF_009858895.2_ASM985889v3_genomic.200409.gff.gz'
nextclade_dataset = 'https://github.com/nf-core/test-datasets/raw/viralrecon/genome/MN908947.3/nextclade_sars-cov-2_MN908947_2022-01-18T12_00_00Z.tar.gz'
nextclade_dataset = 'https://github.com/nf-core/test-datasets/raw/viralrecon/genome/MN908947.3/nextclade_sars-cov-2_MN908947_2022-06-14T12_00_00Z.tar.gz'
nextclade_dataset_name = 'sars-cov-2'
nextclade_dataset_reference = 'MN908947'
nextclade_dataset_tag = '2022-01-18T12:00:00Z'
nextclade_dataset_tag = '2022-06-14T12:00:00Z'
}
// SARS-CoV-2
'MN908947.3' {
fasta = 'https://github.com/nf-core/test-datasets/raw/viralrecon/genome/MN908947.3/GCA_009858895.3_ASM985889v3_genomic.200409.fna.gz'
gff = 'https://github.com/nf-core/test-datasets/raw/viralrecon/genome/MN908947.3/GCA_009858895.3_ASM985889v3_genomic.200409.gff.gz'
nextclade_dataset = 'https://github.com/nf-core/test-datasets/raw/viralrecon/genome/MN908947.3/nextclade_sars-cov-2_MN908947_2022-01-18T12_00_00Z.tar.gz'
nextclade_dataset = 'https://github.com/nf-core/test-datasets/raw/viralrecon/genome/MN908947.3/nextclade_sars-cov-2_MN908947_2022-06-14T12_00_00Z.tar.gz'
nextclade_dataset_name = 'sars-cov-2'
nextclade_dataset_reference = 'MN908947'
nextclade_dataset_tag = '2022-01-18T12:00:00Z'
nextclade_dataset_tag = '2022-06-14T12:00:00Z'
primer_sets {
artic {
'1' {
@ -66,5 +70,28 @@ params {
}
}
}
// Monkeypox
'NC_063383.1' {
fasta = 'https://github.com/nf-core/test-datasets/raw/viralrecon/genome/NC_063383.1/GCF_014621545.1_ASM1462154v1_genomic.220824.fna.gz'
gff = 'https://github.com/nf-core/test-datasets/raw/viralrecon/genome/NC_063383.1/GCF_014621545.1_ASM1462154v1_genomic.220824.gff.gz'
nextclade_dataset = 'https://github.com/nf-core/test-datasets/raw/viralrecon/genome/NC_063383.1/nextclade_hMPXV_NC_063383.1_2022-08-19T12_00_00Z.tar.gz'
nextclade_dataset_name = 'hMPXV'
nextclade_dataset_reference = 'NC_063383.1'
nextclade_dataset_tag = '2022-08-19T12:00:00Z'
}
// Monkeypox
'ON563414.3' {
fasta = 'https://github.com/nf-core/test-datasets/raw/viralrecon/genome/ON563414.3/GCA_023516015.3_ASM2351601v1_genomic.220824.fna.gz'
gff = 'https://github.com/nf-core/test-datasets/raw/viralrecon/genome/ON563414.3/GCA_023516015.3_ASM2351601v1_genomic.220824.gff.gz'
}
// Monkeypox
'MT903344.1' {
fasta = 'https://github.com/nf-core/test-datasets/raw/viralrecon/genome/MT903344.1/GCA_014621585.1_ASM1462158v1_genomic.220824.fna.gz'
gff = 'https://github.com/nf-core/test-datasets/raw/viralrecon/genome/MT903344.1/GCA_014621585.1_ASM1462158v1_genomic.220824.gff.gz'
}
}
}

View file

@ -1,35 +1,33 @@
// Profile details
params {
config_profile_description = 'The Wellcome Sanger Institute HPC cluster profile'
config_profile_contact = 'Anthony Underwood (@aunderwo)'
config_profile_url = 'https://www.sanger.ac.uk/group/informatics-support-group/'
}
singularity {
enabled = true
cacheDir = "${baseDir}/singularity"
runOptions = '--bind /lustre --bind /nfs/pathnfs01 --bind /nfs/pathnfs02 --bind /nfs/pathnfs03 --bind /nfs/pathnfs04 --bind /nfs/pathnfs05 --bind /nfs/pathnfs06 --no-home'
config_profile_description = 'The Wellcome Sanger Institute HPC cluster (farm5) profile'
config_profile_contact = 'Priyanka Surana (@priyanka-surana)'
config_profile_url = 'https://www.sanger.ac.uk'
}
// Queue and retry strategy
process{
executor = 'lsf'
queue = 'normal'
errorStrategy = { task.attempt <= 5 ? "retry" : "finish" }
process.maxRetries = 5
withLabel:process_long {
queue = 'long'
}
executor = 'lsf'
queue = { task.time < 12.h ? 'normal' : task.time < 48.h ? 'long' : 'basement' }
errorStrategy = 'retry'
maxRetries = 5
}
// Executor details
executor{
name = 'lsf'
perJobMemLimit = true
poolSize = 4
submitRateLimit = '5 sec'
killBatchSize = 50
name = 'lsf'
perJobMemLimit = true
poolSize = 4
submitRateLimit = '5 sec'
killBatchSize = 50
}
// Max resources
params {
max_memory = 128.GB
max_cpus = 64
max_time = 48.h
max_memory = 683.GB
max_cpus = 256
max_time = 720.h
}
// For singularity
singularity.runOptions = '--bind /lustre --bind /nfs'

View file

@ -7,9 +7,9 @@ workDir = "$scratch_dir/work"
// Perform work directory cleanup when the run has succesfully completed
// cleanup = true
// Reduce the job submit rate to about 10 per second, this way the server won't be bombarded with jobs
// Reduce the job submit rate to about 5 per second, this way the server won't be bombarded with jobs
executor {
submitRateLimit = '10 sec'
submitRateLimit = '3 sec'
}
// Specify that singularity should be used and where the cache dir will be for the images

70
docs/medair.md Normal file
View file

@ -0,0 +1,70 @@
# nf-core/configs: Medair Configuration
All nf-core pipelines have been successfully configured for use on the Medair cluster at Clinical Genomics Gothenburg.
To use, run the pipeline with `-profile medair`. This will download and launch the [`medair.config`](../conf/medair.config) which has been pre-configured with a setup suitable for the Medair cluster.
It will enable Nextflow to manage the pipeline jobs via the `SGE` job scheduler.
Using this profile, a docker image containing all of the required software will be downloaded, and converted to a Singularity image before execution of the pipeline.
You will need an account to use the Medair cluster in order to download or run pipelines. If in doubt, contact cgg-it.
## Download nf-core pipelines
### Set-up: load Nextflow and nf-core tools
First you need to load relevant softwares: Nextflow and nf-core tools. You can do it as follow:
```bash
## Load Nextflow
module load nextflow
## Load nf-core tools
module load miniconda
source activate nf-core
```
### Storage of Singularity images
When downloading a nf-core pipeline for the first time (or a specific version of a pipeline), you can choose to store the Singularity image for future use. We chose to have a central location for these images on medair: `/apps/bio/dependencies/nf-core/singularities`.
For Nexflow to know where to store new images, run or add the following to your `.bashrc`:
```bash
export NXF_SINGULARITY_CACHEDIR="/apps/bio/dependencies/nf-core/singularities"
```
> Comment: This was also added to cronuser.
### Download a pipeline
We have started to download pipelines in the following location: `/apps/bio/repos/nf-core/`
Use the `nf-core download --singularity-cache-only` command to start a download. It will open an interactive menu. Choose `singularity` for the software container image, and `none` for the compression type.
## Run nf-core pipelines
Nextflow will need to submit the jobs via the job scheduler to the HPC cluster and as such the commands below will have to be executed on one of the login nodes. If in doubt contact cgg-it (cgg-it[at]gu.se).
### Set-up: load Nextflow and Singularity
Before running a pipeline you will need to load Nextflow and Singularity using the environment module system on Medair. You can do this by issuing the commands below:
```bash
## Load Nextflow and Singularity environment modules
module purge
module load nextflow
module load singularity
```
### Choose a profile
Depending on what you are running, you can choose between the `wgs` and `production` profiles. Jobs running with the `wgs` profile run on a queue with higher priority. Jobs running with the `production` profile can last longer (max time: 20 days, versus 2 days for the `wgs` profile).
For example, the following job would run with the `wgs` profile:
```bash
run nextflow nf-core/raredisease -profile medair,wgs
```
### Sentieon
In some pipelines (sarek, raredisease) it is possible to use Sentieon for alignment and variant calling. If ones uses the label `sentieon` for running a process, the config file contains the path to the Sentieon singularity image on Medair.

View file

@ -2,8 +2,6 @@
To use, run the pipeline with `-profile sanger`. This will download and launch the [`sanger.config`](../conf/sanger.config) which has been
pre-configured with a setup suitable for the Wellcome Sanger Institute LSF cluster.
Using this profile, either a docker image containing all of the required software will be downloaded, and converted to a Singularity image or
a Singularity image downloaded directly before execution of the pipeline.
## Running the workflow on the Wellcome Sanger Institute cluster
@ -14,10 +12,12 @@ The latest version of Nextflow is not installed by default on the cluster. You w
A recommended place to move the `nextflow` executable to is `~/bin` so that it's in the `PATH`.
Nextflow manages each process as a separate job that is submitted to the cluster by using the `bsub` command.
Since the Nextflow pipeline will submit individual jobs for each process to the cluster and dependencies will be provided bu Singularity images you shoudl make sure that your account has access to the Singularity binary by adding these lines to your `.bashrc` file
If asking Nextflow to use Singularity to run the individual jobs,
you should make sure that your account has access to the Singularity binary by adding these lines to your `.bashrc` file
```bash
[[ -f /software/pathogen/farm5 ]] && module load ISG/singularity
[[ -f /software/modules/ISG/singularity ]] && module load ISG/singularity
```
Nextflow shouldn't run directly on the submission node but on a compute node.
@ -26,16 +26,16 @@ To do so make a shell script with a similar structure to the following code and
```bash
#!/bin/bash
#BSUB -o /path/to/a/log/dir/%J.o
#BSUB -e /path/to/a/log/dir//%J.e
#BSUB -e /path/to/a/log/dir/%J.e
#BSUB -M 8000
#BSUB -q long
#BSUB -n 4
#BSUB -q oversubscribed
#BSUB -n 2
export HTTP_PROXY='http://wwwcache.sanger.ac.uk:3128'
export HTTPS_PROXY='http://wwwcache.sanger.ac.uk:3128'
export NXF_ANSI_LOG=false
export NXF_OPTS="-Xms8G -Xmx8G -Dnxf.pool.maxThreads=2000"
export NXF_VER=21.04.0-edge
export NXF_VER=22.04.0-5697
nextflow run \

View file

@ -49,6 +49,7 @@ profiles {
lugh { includeConfig "${params.custom_config_base}/conf/lugh.config" }
maestro { includeConfig "${params.custom_config_base}/conf/maestro.config" }
marvin { includeConfig "${params.custom_config_base}/conf/marvin.config" }
medair { includeConfig "${params.custom_config_base}/conf/medair.config" }
mjolnir_globe { includeConfig "${params.custom_config_base}/conf/mjolnir_globe.config" }
mpcdf { includeConfig "${params.custom_config_base}/conf/mpcdf.config" }
munin { includeConfig "${params.custom_config_base}/conf/munin.config" }