diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 487c7ee..66bf36b 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -49,6 +49,7 @@ jobs: - "cfc_dev" - "cheaha" - "computerome" + - "crg" - "crick" - "crukmi" - "denbi_qbic" @@ -62,11 +63,14 @@ jobs: - "google" - "hasta" - "hebbe" + - "hki" - "icr_davros" - "ifb_core" - "imperial" - "jax" + - "ku_sund_dangpu" - "lugh" + - "mana" - "marvin" - "medair" - "mjolnir_globe" @@ -83,7 +87,10 @@ jobs: - "sage" - "sahmri" - "sanger" + - "sbc_sharc" - "seg_globe" + - "tigem" + - "ucl_myriad" - "uct_hpc" - "unibe_ibu" - "uppmax" diff --git a/README.md b/README.md index 445467b..a066053 100644 --- a/README.md +++ b/README.md @@ -118,11 +118,14 @@ Currently documentation is available for the following systems: - [GOOGLE](docs/google.md) - [HASTA](docs/hasta.md) - [HEBBE](docs/hebbe.md) +- [HKI](docs/hki.md) - [ICR_DAVROS](docs/icr_davros.md) - [IMPERIAL](docs/imperial.md) - [JAX](docs/jax.md) +- [KU SUND DANGPU](docs/ku_sund_dangpu.md) - [LUGH](docs/lugh.md) - [MAESTRO](docs/maestro.md) +- [Mana](docs/mana.md) - [MARVIN](docs/marvin.md) - [MEDAIR](docs/medair.md) - [MJOLNIR_GLOBE](docs/mjolnir_globe.md) @@ -137,7 +140,10 @@ Currently documentation is available for the following systems: - [ROSALIND](docs/rosalind.md) - [SAGE BIONETWORKS](docs/sage.md) - [SANGER](docs/sanger.md) +- [SBC_SHARC](docs/sbc_sharc.md) - [SEG_GLOBE](docs/seg_globe.md) +- [TIGEM](docs/tigem.md) +- [UCL_MYRIAD](docs/ucl_myriad.md) - [UCT_HPC](docs/uct_hpc.md) - [UNIBE_IBU](docs/unibe_ibu.md) - [UPPMAX](docs/uppmax.md) @@ -197,6 +203,12 @@ Currently documentation is available for the following pipelines within specific - ampliseq - [BINAC](docs/pipeline/ampliseq/binac.md) - [UPPMAX](docs/pipeline/ampliseq/uppmax.md) +- atacseq + - [SBC_SHARC](docs/pipeline/atacseq/sbc_sharc.md) +- chipseq + - [SBC_SHARC](docs/pipeline/chipseq/sbc_sharc.md) +- demultiplex + - [AWS_TOWER](docs/pipeline/demultiplex/aws_tower.md) - eager - [EVA](docs/pipeline/eager/eva.md) - mag @@ -204,15 +216,20 @@ Currently documentation is available for the following pipelines within specific - rnafusion - [HASTA](docs/pipeline/rnafusion/hasta.md) - [MUNIN](docs/pipeline/rnafusion/munin.md) +- rnaseq + - [SBC_SHARC](docs/pipeline/rnaseq/sbc_sharc.md) - rnavar - [MUNIN](docs/pipeline/rnavar/munin.md) - sarek - [Cancer Research UK Manchester Institute](docs/pipeline/sarek/crukmi.md) - [MUNIN](docs/pipeline/sarek/munin.md) + - [SBC_SHARC](docs/pipeline/sarek/sbc_sharc.md) - [UPPMAX](docs/pipeline/sarek/uppmax.md) - taxprofiler - [EVA](docs/pipeline/taxprofiler/eva.md) - [hasta](docs/pipeline/taxprofiler/hasta.md) +- proteinfold + - [CRG](docs/pipeline/proteinfold/crg.md) ### Pipeline-specific documentation diff --git a/conf/biohpc_gen.config b/conf/biohpc_gen.config index e3f4069..694a25f 100755 --- a/conf/biohpc_gen.config +++ b/conf/biohpc_gen.config @@ -11,9 +11,8 @@ env { process { executor = 'slurm' - queue = { task.memory <= 1536.GB ? (task.time > 2.d || task.memory > 384.GB ? 'biohpc_gen_production' : 'biohpc_gen_normal') : 'biohpc_gen_highmem' } - beforeScript = 'module use /dss/dsslegfs02/pn73se/pn73se-dss-0000/spack/modules/x86_avx2/linux*' - module = 'charliecloud/0.22:miniconda3' + queue = { task.memory <= 1536.GB ? (task.time > 2.d || task.memory > 384.GB ? 'biohpc_gen_production' : 'biohpc_gen_normal') : 'biohpc_gen_highmem' } + module = 'charliecloud/0.25' } charliecloud { @@ -21,7 +20,7 @@ charliecloud { } params { - params.max_time = 14.d - params.max_cpus = 80 + params.max_time = 14.d + params.max_cpus = 80 params.max_memory = 3.TB } diff --git a/conf/ccga_med.config b/conf/ccga_med.config index d55fde2..c9b7b44 100644 --- a/conf/ccga_med.config +++ b/conf/ccga_med.config @@ -13,8 +13,8 @@ params { singularity { enabled = true - runOptions = "-B /work_ifs -B /scratch" - cacheDir = "/work_ifs/ikmb_repository/singularity_cache/" + runOptions = "-B /work_ifs -B /scratch -B /work_beegfs" + cacheDir = "/work_beegfs/ikmb_repository/singularity_cache/" } executor { @@ -31,7 +31,7 @@ process { params { // illumina iGenomes reference file paths on RZCluster - igenomes_base = '/work_ifs/ikmb_repository/references/iGenomes/references/' + igenomes_base = '/work_beegfs/ikmb_repository/references/iGenomes/references/' saveReference = true max_memory = 250.GB max_cpus = 24 diff --git a/conf/cfc.config b/conf/cfc.config index 1948e14..999e8d6 100644 --- a/conf/cfc.config +++ b/conf/cfc.config @@ -12,7 +12,7 @@ singularity { process { executor = 'slurm' - queue = { task.memory > 60.GB || task.cpus > 20 ? 'qbic' : 'compute' } + queue = 'qbic' scratch = 'true' } diff --git a/conf/cfc_dev.config b/conf/cfc_dev.config index 1d61baf..87caf66 100644 --- a/conf/cfc_dev.config +++ b/conf/cfc_dev.config @@ -11,7 +11,7 @@ singularity { process { executor = 'slurm' - queue = { task.memory > 60.GB || task.cpus > 20 ? 'qbic' : 'compute' } + queue = 'qbic' scratch = 'true' } @@ -25,4 +25,4 @@ params { max_memory = 1999.GB max_cpus = 128 max_time = 140.h -} \ No newline at end of file +} diff --git a/conf/cheaha.config b/conf/cheaha.config index 58963b5..ec79f10 100644 --- a/conf/cheaha.config +++ b/conf/cheaha.config @@ -9,7 +9,7 @@ params { } env { - TMPDIR="$USER" + TMPDIR="$scratch_dir" SINGULARITY_TMPDIR="$scratch_dir" } diff --git a/conf/crg.config b/conf/crg.config new file mode 100755 index 0000000..7216b80 --- /dev/null +++ b/conf/crg.config @@ -0,0 +1,14 @@ +//Profile config names for nf-core/configs +params { + config_profile_description = 'Centre for Genomic Regulation (CRG) cluster profile provided by nf-core/configs' + config_profile_contact = 'Athanasios Baltzis (@athbaltzis)' + config_profile_url = 'http://www.linux.crg.es/index.php/Main_Page' +} + +process { + executor = 'crg' +} + +singularity { + enabled = true +} diff --git a/conf/crukmi.config b/conf/crukmi.config index 4823585..778935b 100644 --- a/conf/crukmi.config +++ b/conf/crukmi.config @@ -5,11 +5,8 @@ params { config_profile_url = 'http://scicom.picr.man.ac.uk/projects/user-support/wiki' } -env { - SINGULARITY_CACHEDIR = '/lmod/nextflow_software' -} - singularity { + cacheDir = '/lmod/nextflow_software' enabled = true autoMounts = true } @@ -22,6 +19,11 @@ process { maxErrors = '-1' maxRetries = 3 + withLabel:process_single { + cpus = { check_max( 1 * task.attempt, 'cpus' ) } + memory = { check_max( 5.GB * task.attempt, 'memory' ) } + } + withLabel:process_low { cpus = { check_max( 1 * task.attempt, 'cpus' ) } memory = { check_max( 5.GB * task.attempt, 'memory' ) } diff --git a/conf/hki.config b/conf/hki.config new file mode 100644 index 0000000..63718bf --- /dev/null +++ b/conf/hki.config @@ -0,0 +1,104 @@ +params { + config_profile_description = 'HKI clusters profile provided by nf-core/configs.' + config_profile_contact = 'James Fellows Yates (@jfy133)' + config_profile_url = 'https://leibniz-hki.de' +} + +profiles { + apate { + params { + config_profile_description = 'apate HKI cluster profile provided by nf-core/configs' + config_profile_contact = 'James Fellows Yates (@jfy133)' + config_profile_url = 'https://leibniz-hki.de' + max_memory = 128.GB + max_cpus = 32 + max_time = 1440.h + } + process { + executor = 'local' + maxRetries = 2 + } + + executor { + queueSize = 8 + } + + singularity { + enabled = true + autoMounts = true + cacheDir = '/Net/Groups/ccdata/apps/singularity' + } + + conda { + cacheDir = '/Net/Groups/ccdata/apps/conda_envs' + } + + cleanup = true + } + + aither { + params { + config_profile_description = 'aither HKI cluster profile provided by nf-core/configs' + config_profile_contact = 'James Fellows Yates (@jfy133)' + config_profile_url = 'https://leibniz-hki.de' + max_memory = 128.GB + max_cpus = 32 + max_time = 1440.h + } + process { + executor = 'local' + maxRetries = 2 + } + + executor { + queueSize = 8 + } + + singularity { + enabled = true + autoMounts = true + cacheDir = '/Net/Groups/ccdata/apps/singularity' + } + + conda { + cacheDir = '/Net/Groups/ccdata/apps/conda_envs' + } + + cleanup = true + } + + arges { + params { + config_profile_description = 'arges HKI cluster profile provided by nf-core/configs' + config_profile_contact = 'James Fellows Yates (@jfy133)' + config_profile_url = 'https://leibniz-hki.de' + max_memory = 64.GB + max_cpus = 12 + max_time = 1440.h + } + process { + executor = 'local' + maxRetries = 2 + } + + executor { + queueSize = 8 + } + + singularity { + enabled = true + autoMounts = true + cacheDir = '/Net/Groups/ccdata/apps/singularity' + } + + conda { + cacheDir = '/Net/Groups/ccdata/apps/conda_envs' + } + + cleanup = true + } + + debug { + cleanup = false + } +} diff --git a/conf/ku_sund_dangpu.config b/conf/ku_sund_dangpu.config new file mode 100644 index 0000000..51ca462 --- /dev/null +++ b/conf/ku_sund_dangpu.config @@ -0,0 +1,25 @@ +params { + config_profile_contact = 'Adrija Kalvisa ' + config_profile_description = 'dangpufl01 configuration' + config_profile_url = '' + + // General cpus/memory/time requirements + max_cpus = 30 + max_memory = 200.GB + max_time = 72.h +} + +process { + executor = 'slurm' + +} + +executor { + queueSize = 5 +} + +singularity { + enabled = true + autoMounts = true + runOptions = '--bind /projects:/projects' +} \ No newline at end of file diff --git a/conf/mana.config b/conf/mana.config new file mode 100644 index 0000000..93d674c --- /dev/null +++ b/conf/mana.config @@ -0,0 +1,21 @@ +params { + config_profile_description = 'University of Hawaii at Manoa' + config_profile_url = 'http://www.hawaii.edu/its/ci/' + config_profile_contact = 'Cedric Arisdakessian' + + max_memory = 400.GB + max_cpus = 96 + max_time = 72.h +} + +process { + executor = 'slurm' + queue = 'shared,exclusive,kill-shared,kill-exclusive' + module = 'tools/Singularity' +} + +singularity { + enabled = true + cacheDir = "$HOME/.singularity_images_cache" + autoMounts = true +} diff --git a/conf/munin.config b/conf/munin.config index 5f794d6..0fca214 100644 --- a/conf/munin.config +++ b/conf/munin.config @@ -29,7 +29,7 @@ process { singularity { cacheDir = '/data1/containers/' enabled = true - runOptions = "--bind /media/BTB_2021_01" + //runOptions = "--bind /media/BTB_2021_01" } // To use docker, use nextflow run -profile munin,docker diff --git a/conf/nihbiowulf.config b/conf/nihbiowulf.config index 6159f0d..92deee0 100644 --- a/conf/nihbiowulf.config +++ b/conf/nihbiowulf.config @@ -28,6 +28,15 @@ profiles { executor.$slurm.submitRateLimit = '6/1min' process.clusterOptions = ' --gres=lscratch:600 --signal USR2@20' } + + standard { + process.executor = 'slurm' + executor.$slurm.pollInterval = '1 min' + executor.$slurm.queueStatInterval = '5 min' + executor.queueSize = 100 + executor.$slurm.submitRateLimit = '6/1min' + process.clusterOptions = ' --gres=lscratch:600 --signal USR2@20' + } } singularity { diff --git a/conf/pipeline/atacseq/sbc_sharc.config b/conf/pipeline/atacseq/sbc_sharc.config new file mode 100644 index 0000000..7cd0e4a --- /dev/null +++ b/conf/pipeline/atacseq/sbc_sharc.config @@ -0,0 +1,73 @@ +// Sheffield Bioinformatics Core Configuration Profile - ShARC +// Custom Pipeline Resource Config for nf-core/atacseq + +// process-specific resource requirements - reduced specification from those in atacseq/conf/base.config + +process { + + // error and retry handling + + errorStrategy = { task.exitStatus in [143,137,104,134,139,140] ? 'retry' : 'finish' } + maxRetries = 2 + + + // process labels + + withLabel:process_low { + cpus = { check_max( 2 * task.attempt, 'cpus' ) } + memory = { check_max( 12.GB * task.attempt, 'memory' ) } + time = { check_max( 4.h * task.attempt, 'time' ) } + } + + withLabel:process_medium { + cpus = { check_max( 4 * task.attempt, 'cpus' ) } + memory = { check_max( 32.GB * task.attempt, 'memory' ) } + time = { check_max( 6.h * task.attempt, 'time' ) } + } + + withLabel:process_high { + cpus = { check_max( 8 * task.attempt, 'cpus' ) } + memory = { check_max( 128.GB * task.attempt, 'memory' ) } + time = { check_max( 8.h * task.attempt, 'time' ) } + } + + withLabel:process_long { + time = { check_max( 12.h * task.attempt, 'time' ) } + } + +} + + +// function 'check_max()' to ensure that resource requirements don't go beyond maximum limit + +def check_max(obj, type) { + if (type == 'memory') { + try { + if (obj.compareTo(params.max_memory as nextflow.util.MemoryUnit) == 1) + return params.max_memory as nextflow.util.MemoryUnit + else + return obj + } catch (all) { + println " ### ERROR ### Max memory '${params.max_memory}' is not valid! Using default value: $obj" + return obj + } + } else if (type == 'time') { + try { + if (obj.compareTo(params.max_time as nextflow.util.Duration) == 1) + return params.max_time as nextflow.util.Duration + else + return obj + } catch (all) { + println " ### ERROR ### Max time '${params.max_time}' is not valid! Using default value: $obj" + return obj + } + } else if (type == 'cpus') { + try { + return Math.min(obj, params.max_cpus as int) + } catch (all) { + println " ### ERROR ### Max cpus '${params.max_cpus}' is not valid! Using default value: $obj" + return obj + } + } +} + diff --git a/conf/pipeline/chipseq/sbc_sharc.config b/conf/pipeline/chipseq/sbc_sharc.config new file mode 100644 index 0000000..0e66333 --- /dev/null +++ b/conf/pipeline/chipseq/sbc_sharc.config @@ -0,0 +1,73 @@ +// Sheffield Bioinformatics Core Configuration Profile - ShARC +// Custom Pipeline Resource Config for nf-core/chipseq + +// process-specific resource requirements - reduced specification from those in chipseq/conf/base.config + +process { + + // error and retry handling + + errorStrategy = { task.exitStatus in [143,137,104,134,139,140] ? 'retry' : 'finish' } + maxRetries = 2 + + + // process labels + + withLabel:process_low { + cpus = { check_max( 2 * task.attempt, 'cpus' ) } + memory = { check_max( 12.GB * task.attempt, 'memory' ) } + time = { check_max( 4.h * task.attempt, 'time' ) } + } + + withLabel:process_medium { + cpus = { check_max( 4 * task.attempt, 'cpus' ) } + memory = { check_max( 32.GB * task.attempt, 'memory' ) } + time = { check_max( 6.h * task.attempt, 'time' ) } + } + + withLabel:process_high { + cpus = { check_max( 8 * task.attempt, 'cpus' ) } + memory = { check_max( 128.GB * task.attempt, 'memory' ) } + time = { check_max( 8.h * task.attempt, 'time' ) } + } + + withLabel:process_long { + time = { check_max( 12.h * task.attempt, 'time' ) } + } + +} + + +// function 'check_max()' to ensure that resource requirements don't go beyond maximum limit + +def check_max(obj, type) { + if (type == 'memory') { + try { + if (obj.compareTo(params.max_memory as nextflow.util.MemoryUnit) == 1) + return params.max_memory as nextflow.util.MemoryUnit + else + return obj + } catch (all) { + println " ### ERROR ### Max memory '${params.max_memory}' is not valid! Using default value: $obj" + return obj + } + } else if (type == 'time') { + try { + if (obj.compareTo(params.max_time as nextflow.util.Duration) == 1) + return params.max_time as nextflow.util.Duration + else + return obj + } catch (all) { + println " ### ERROR ### Max time '${params.max_time}' is not valid! Using default value: $obj" + return obj + } + } else if (type == 'cpus') { + try { + return Math.min(obj, params.max_cpus as int) + } catch (all) { + println " ### ERROR ### Max cpus '${params.max_cpus}' is not valid! Using default value: $obj" + return obj + } + } +} + diff --git a/conf/pipeline/demultiplex/aws_tower.config b/conf/pipeline/demultiplex/aws_tower.config new file mode 100644 index 0000000..520487f --- /dev/null +++ b/conf/pipeline/demultiplex/aws_tower.config @@ -0,0 +1,29 @@ +// Profile config names for nf-core/configs + +params { + // Specific nf-core/configs params + config_profile_contact = 'Edmund Miller(@emiller88)' + config_profile_description = 'nf-core/demultiplex AWS Tower profile provided by nf-core/configs' +} + +aws { + batch { + maxParallelTransfers = 24 + maxTransferAttempts = 3 + } + client { + maxConnections = 24 + uploadMaxThreads = 24 + maxErrorRetry = 3 + socketTimeout = 3600000 + uploadRetrySleep = 1000 + uploadChunkSize = 32.MB + } +} + +process { + withName: BASES2FASTQ { + cpus = 16 + memory = 48.GB + } +} diff --git a/conf/pipeline/mag/eva.config b/conf/pipeline/mag/eva.config index 81d8d0c..37a531d 100644 --- a/conf/pipeline/mag/eva.config +++ b/conf/pipeline/mag/eva.config @@ -4,6 +4,11 @@ params { config_profile_description = 'nf-core/mag EVA profile provided by nf-core/configs' } +env { + OPENBLAS_NUM_THREADS=1 + OMP_NUM_THREADS=1 +} + process { withName: FASTQC { diff --git a/conf/pipeline/proteinfold/crg.config b/conf/pipeline/proteinfold/crg.config new file mode 100644 index 0000000..f778d8c --- /dev/null +++ b/conf/pipeline/proteinfold/crg.config @@ -0,0 +1,27 @@ +profiles { + crg { + params { + config_profile_contact = 'Athanasios Baltzis (@athbaltzis)' + config_profile_description = 'nf-core/proteinfold CRG profile provided by nf-core/configs' + } + executor.name = 'crg' + process { + queue = 'short-sl7,long-sl7' + withName: 'RUN_AF2|RUN_AF2_PRED|COLABFOLD_BATCH' { + cpus = 1 + memory = "30 GB" + queue = params.use_gpu ? 'gpu' : 'long-sl7' + clusterOptions = { ( task.queue == 'gpu' ? '-l gpu=1' : '' ) } + } + withName: 'ARIA2' { + time = '12h' + } + withName: 'MMSEQS_COLABFOLDSEARCH' { + queue = 'mem_512' + memory = "100 GB" + cpus = 8 + time = '12h' + } + } + } +} diff --git a/conf/pipeline/rnaseq/sbc_sharc.config b/conf/pipeline/rnaseq/sbc_sharc.config new file mode 100644 index 0000000..d9d9878 --- /dev/null +++ b/conf/pipeline/rnaseq/sbc_sharc.config @@ -0,0 +1,77 @@ +// Sheffield Bioinformatics Core Configuration Profile - ShARC +// Custom Pipeline Resource Config for nf-core/rnaseq + +// process-specific resource requirements - reduced specification from those in rnaseq/conf/base.config + +process { + + // error and retry handling + + errorStrategy = { task.exitStatus in [143,137,104,134,139,140] ? 'retry' : 'finish' } + maxRetries = 2 + + + // process labels + + withLabel:process_low { + cpus = { check_max( 2 * task.attempt, 'cpus' ) } + memory = { check_max( 12.GB * task.attempt, 'memory' ) } + time = { check_max( 4.h * task.attempt, 'time' ) } + } + + withLabel:process_medium { + cpus = { check_max( 4 * task.attempt, 'cpus' ) } + memory = { check_max( 32.GB * task.attempt, 'memory' ) } + time = { check_max( 6.h * task.attempt, 'time' ) } + } + + withLabel:process_high { + cpus = { check_max( 8 * task.attempt, 'cpus' ) } + memory = { check_max( 128.GB * task.attempt, 'memory' ) } + time = { check_max( 8.h * task.attempt, 'time' ) } + } + + withLabel:process_long { + time = { check_max( 12.h * task.attempt, 'time' ) } + } + + withLabel:process_high_memory { + memory = { check_max( 160.GB * task.attempt, 'memory' ) } + } + +} + + +// function 'check_max()' to ensure that resource requirements don't go beyond maximum limit + +def check_max(obj, type) { + if (type == 'memory') { + try { + if (obj.compareTo(params.max_memory as nextflow.util.MemoryUnit) == 1) + return params.max_memory as nextflow.util.MemoryUnit + else + return obj + } catch (all) { + println " ### ERROR ### Max memory '${params.max_memory}' is not valid! Using default value: $obj" + return obj + } + } else if (type == 'time') { + try { + if (obj.compareTo(params.max_time as nextflow.util.Duration) == 1) + return params.max_time as nextflow.util.Duration + else + return obj + } catch (all) { + println " ### ERROR ### Max time '${params.max_time}' is not valid! Using default value: $obj" + return obj + } + } else if (type == 'cpus') { + try { + return Math.min(obj, params.max_cpus as int) + } catch (all) { + println " ### ERROR ### Max cpus '${params.max_cpus}' is not valid! Using default value: $obj" + return obj + } + } +} + diff --git a/conf/pipeline/sarek/sbc_sharc.config b/conf/pipeline/sarek/sbc_sharc.config new file mode 100644 index 0000000..3a7f5b9 --- /dev/null +++ b/conf/pipeline/sarek/sbc_sharc.config @@ -0,0 +1,111 @@ +// Sheffield Bioinformatics Core Configuration Profile - ShARC +// Custom Pipeline Resource Config for nf-core/sarek + +// process-specific resource requirements - reduced specification from those in sarek/conf/base.config + +process { + + // error and retry handling + + errorStrategy = { task.exitStatus in [143,137,104,134,139,140,247] ? 'retry' : 'finish' } + maxRetries = 2 + + + // process labels + + withLabel:process_low { + cpus = { check_max( 2 * task.attempt, 'cpus' ) } + memory = { check_max( 16.GB * task.attempt, 'memory' ) } + time = { check_max( 4.h * task.attempt, 'time' ) } + } + + withLabel:process_medium { + cpus = { check_max( 6 * task.attempt, 'cpus' ) } + memory = { check_max( 72.GB * task.attempt, 'memory' ) } + time = { check_max( 6.h * task.attempt, 'time' ) } + } + + withLabel:process_high { + cpus = { check_max( 12 * task.attempt, 'cpus' ) } + memory = { check_max( 192.GB * task.attempt, 'memory' ) } + time = { check_max( 8.h * task.attempt, 'time' ) } + } + + withLabel:process_long { + time = { check_max( 12.h * task.attempt, 'time' ) } + } + + withLabel:process_high_memory { + memory = { check_max( 240.GB * task.attempt, 'memory' ) } + } + + + // process name + + withName:'BWAMEM1_MEM|BWAMEM2_MEM' { + cpus = { check_max( 12 * task.attempt, 'cpus' ) } + memory = { check_max( 192.GB * task.attempt, 'memory' ) } + time = { check_max( 8.h * task.attempt, 'time' ) } + } + + withName:'FASTP' { + cpus = { check_max( 12 * task.attempt, 'cpus' ) } + } + + withName:'FASTQC|FASTP|MOSDEPTH|SAMTOOLS_CONVERT' { + memory = { check_max( 4.GB * task.attempt, 'memory' ) } + } + + withName:'GATK4_APPLYBQSR|GATK4_APPLYBQSR_SPARK|GATK4_BASERECALIBRATOR|SAMTOOLS_STATS' { + cpus = { check_max( 4 * task.attempt, 'cpus' ) } + } + + withName:'GATK4_APPLYBQSR|GATK4_APPLYBQSR_SPARK|GATK4_BASERECALIBRATOR|GATK4_GATHERBQSRREPORTS' { + memory = { check_max( 72.GB * task.attempt, 'memory' ) } + } + + withName:'GATK4_MARKDUPLICATES' { + cpus = { check_max( 12 * task.attempt, 'cpus' ) } + memory = { check_max( 240.GB * task.attempt, 'memory' ) } + time = { check_max( 12.h * task.attempt, 'time' ) } + } + + withName:'FREEBAYES|SAMTOOLS_STATS|SAMTOOLS_INDEX|UNZIP' { + cpus = { check_max( 1 * task.attempt, 'cpus' ) } + } + +} + + +// function 'check_max()' to ensure that resource requirements don't go beyond maximum limit + +def check_max(obj, type) { + if (type == 'memory') { + try { + if (obj.compareTo(params.max_memory as nextflow.util.MemoryUnit) == 1) + return params.max_memory as nextflow.util.MemoryUnit + else + return obj + } catch (all) { + println " ### ERROR ### Max memory '${params.max_memory}' is not valid! Using default value: $obj" + return obj + } + } else if (type == 'time') { + try { + if (obj.compareTo(params.max_time as nextflow.util.Duration) == 1) + return params.max_time as nextflow.util.Duration + else + return obj + } catch (all) { + println " ### ERROR ### Max time '${params.max_time}' is not valid! Using default value: $obj" + return obj + } + } else if (type == 'cpus') { + try { + return Math.min(obj, params.max_cpus as int) + } catch (all) { + println " ### ERROR ### Max cpus '${params.max_cpus}' is not valid! Using default value: $obj" + return obj + } + } +} diff --git a/conf/pipeline/viralrecon/genomes.config b/conf/pipeline/viralrecon/genomes.config index 06c8577..20049a3 100644 --- a/conf/pipeline/viralrecon/genomes.config +++ b/conf/pipeline/viralrecon/genomes.config @@ -68,6 +68,20 @@ params { scheme = 'nCoV-2019' } } + 'NEB' { + 'vss1' { + fasta = 'https://github.com/nf-core/test-datasets/raw/viralrecon/genome/MN908947.3/primer_schemes/artic/nCoV-2019/V1200/nCoV-2019.reference.fasta' + gff = 'https://github.com/nf-core/test-datasets/raw/viralrecon/genome/MN908947.3/GCA_009858895.3_ASM985889v3_genomic.200409.gff.gz' + primer_bed = 'https://github.com/nf-core/test-datasets/raw/viralrecon/genome/MN908947.3/primer_schemes/NEB/nCoV-2019/vss1/neb_vss1.primer.bed' + scheme = 'nCoV-2019' + } + 'vsl1' { + fasta = 'https://github.com/nf-core/test-datasets/raw/viralrecon/genome/MN908947.3/primer_schemes/artic/nCoV-2019/V1200/nCoV-2019.reference.fasta' + gff = 'https://github.com/nf-core/test-datasets/raw/viralrecon/genome/MN908947.3/GCA_009858895.3_ASM985889v3_genomic.200409.gff.gz' + primer_bed = 'https://github.com/nf-core/test-datasets/raw/viralrecon/genome/MN908947.3/primer_schemes/NEB/nCoV-2019/vsl1/neb_vsl1.primer.bed' + scheme = 'nCoV-2019' + } + } } } diff --git a/conf/sage.config b/conf/sage.config index bfe1e09..c77f8fa 100644 --- a/conf/sage.config +++ b/conf/sage.config @@ -29,12 +29,18 @@ aws { region = "us-east-1" client { uploadChunkSize = 209715200 + uploadMaxThreads = 4 + } + batch { + maxParallelTransfers = 1 + maxTransferAttempts = 5 + delayBetweenAttempts = '120 sec' } } executor { name = 'awsbatch' // Ensure unlimited queue size on AWS Batch - queueSize = 100000 + queueSize = 500 // Slow down the rate at which AWS Batch jobs accumulate in // the queue (an attempt to prevent orphaned EBS volumes) submitRateLimit = '5 / 1 sec' diff --git a/conf/sbc_sharc.config b/conf/sbc_sharc.config new file mode 100644 index 0000000..91cf4e3 --- /dev/null +++ b/conf/sbc_sharc.config @@ -0,0 +1,59 @@ +// Sheffield Bioinformatics Core Configuration Profile - ShARC +// Base Institutional Configuration + + +// nf-core specific parameters displayed in header summary of each run + +params { + + config_profile_description = 'Sheffield Bioinformatics Core - ShARC' + config_profile_contact = 'Lewis Quayle (l.quayle@sheffield.ac.uk)' + config_profile_url = 'https://docs.hpc.shef.ac.uk/en/latest/sharc/index.html' + +} + + +// hpc resource limits + +params { + + max_cpus = 16 + max_memory = 256.GB + max_time = 96.h + +} + + +// hpc configuration specific to ShARC + +process { + + // scheduler + + executor = 'sge' + penv = 'smp' + queue = { task.time <= 6.h ? 'shortint.q' : 'all.q' } + clusterOptions = { "-l rmem=${ (task.memory.toGiga() / task.cpus) }G" } + +} + + +// optional executor settings + +executor { + + queueSize = 10 + submitRateLimit = '1 sec' + +} + + +// container engine + +singularity { + + enabled = true + autoMounts = true + +} + diff --git a/conf/tigem.config b/conf/tigem.config new file mode 100644 index 0000000..b89a4ed --- /dev/null +++ b/conf/tigem.config @@ -0,0 +1,14 @@ +params { + config_profile_description = 'Telethon Institute of Genetic and Medicine (TIGEM) provided by nf-core/configs.' + config_profile_contact = 'Giuseppe Martone (@giusmar)' + config_profile_url = 'https://github.com/giusmar' +} + +process.executor = 'slurm' +google.zone = 'europe-west1' + +singularity { + enabled = true + autoMounts = true + cacheDir = 'work/singularity' +} diff --git a/conf/ucl_myriad.config b/conf/ucl_myriad.config new file mode 100644 index 0000000..3f9425c --- /dev/null +++ b/conf/ucl_myriad.config @@ -0,0 +1,34 @@ +params { + + config_profile_description = 'University College London Myriad cluster' + config_profile_contact = 'Chris Wyatt (ucbtcdr@ucl.ac.uk)' + config_profile_url = 'https://www.rc.ucl.ac.uk/docs/Clusters/Myriad/' + +} + +process { + executor='sge' + penv = 'smp' +} + +params { + // Defaults only, expecting to be overwritten + max_memory = 128.GB + max_cpus = 36 + max_time = 72.h + // igenomes_base = 's3://ngi-igenomes/igenomes/' +} + +// optional executor settings + +executor { + + queueSize = 10 + submitRateLimit = '1 sec' + +} + +singularity { + enabled = true + autoMounts = true +} \ No newline at end of file diff --git a/conf/vsc_ugent.config b/conf/vsc_ugent.config index 0bc6ffd..4a3733a 100644 --- a/conf/vsc_ugent.config +++ b/conf/vsc_ugent.config @@ -7,9 +7,17 @@ workDir = "$scratch_dir/work" // Perform work directory cleanup when the run has succesfully completed // cleanup = true -// Reduce the job submit rate to about 5 per second, this way the server won't be bombarded with jobs +// Reduce the job submit rate to about 30 per minute, this way the server won't be bombarded with jobs +// Limit queueSize to keep job rate under control and avoid timeouts executor { - submitRateLimit = '3 sec' + submitRateLimit = '30/1min' + queueSize = 50 +} + +// Add backoff strategy to catch cluster timeouts +process { + errorStrategy = { sleep(Math.pow(2, task.attempt) * 200 as long); return 'retry' } + maxRetries = 5 } // Specify that singularity should be used and where the cache dir will be for the images @@ -28,7 +36,7 @@ profiles { skitty { params { config_profile_description = 'HPC_SKITTY profile for use on the Skitty cluster of the VSC HPC.' - config_profile_contact = 'Nicolas Vannieuwkerke (@nvnieuwk)' + config_profile_contact = 'ict@cmgg.be' config_profile_url = 'https://www.ugent.be/hpc/en' max_memory = 177.GB max_cpus = 36 @@ -38,7 +46,6 @@ profiles { process { executor = 'slurm' queue = 'skitty' - maxRetries = 2 scratch = "$scratch_dir" } } @@ -46,7 +53,7 @@ profiles { swalot { params { config_profile_description = 'HPC_SWALOT profile for use on the Swalot cluster of the VSC HPC.' - config_profile_contact = 'Nicolas Vannieuwkerke (@nvnieuwk)' + config_profile_contact = 'ict@cmgg.be' config_profile_url = 'https://www.ugent.be/hpc/en' max_memory = 116.GB max_cpus = 20 @@ -56,7 +63,6 @@ profiles { process { executor = 'slurm' queue = 'swalot' - maxRetries = 2 scratch = "$scratch_dir" } } @@ -64,7 +70,7 @@ profiles { victini { params { config_profile_description = 'HPC_VICTINI profile for use on the Victini cluster of the VSC HPC.' - config_profile_contact = 'Nicolas Vannieuwkerke (@nvnieuwk)' + config_profile_contact = 'ict@cmgg.be' config_profile_url = 'https://www.ugent.be/hpc/en' max_memory = 88.GB max_cpus = 36 @@ -74,7 +80,6 @@ profiles { process { executor = 'slurm' queue = 'victini' - maxRetries = 2 scratch = "$scratch_dir" } } @@ -82,7 +87,7 @@ profiles { kirlia { params { config_profile_description = 'HPC_KIRLIA profile for use on the Kirlia cluster of the VSC HPC.' - config_profile_contact = 'Nicolas Vannieuwkerke (@nvnieuwk)' + config_profile_contact = 'ict@cmgg.be' config_profile_url = 'https://www.ugent.be/hpc/en' max_memory = 738.GB max_cpus = 36 @@ -92,7 +97,6 @@ profiles { process { executor = 'slurm' queue = 'kirlia' - maxRetries = 2 scratch = "$scratch_dir" } } @@ -100,7 +104,7 @@ profiles { doduo { params { config_profile_description = 'HPC_DODUO profile for use on the Doduo cluster of the VSC HPC.' - config_profile_contact = 'Nicolas Vannieuwkerke (@nvnieuwk)' + config_profile_contact = 'ict@cmgg.be' config_profile_url = 'https://www.ugent.be/hpc/en' max_memory = 250.GB max_cpus = 96 @@ -110,8 +114,7 @@ profiles { process { executor = 'slurm' queue = 'doduo' - maxRetries = 2 scratch = "$scratch_dir" } } -} \ No newline at end of file +} diff --git a/docs/biohpc_gen.md b/docs/biohpc_gen.md index c007cad..660e789 100644 --- a/docs/biohpc_gen.md +++ b/docs/biohpc_gen.md @@ -4,14 +4,12 @@ All nf-core pipelines have been successfully configured for use on the BioHPC Ge To use, run the pipeline with `-profile biohpc_gen`. This will download and launch the [`biohpc_gen.config`](../conf/biohpc_gen.config) which has been pre-configured with a setup suitable for the biohpc_gen cluster. Using this profile, a docker image containing all of the required software will be downloaded, and converted to a Charliecloud container before execution of the pipeline. -Before running the pipeline you will need to load Nextflow and Charliecloud using the environment module system on biohpc_gen. You can do this by issuing the commands below: +Before running the pipeline you will need to load Nextflow and Charliecloud using the environment module system on a login node. You can do this by issuing the commands below: ```bash ## Load Nextflow and Charliecloud environment modules -module purge -module load nextflow charliecloud/0.22 +module load nextflow/21.04.3 charliecloud/0.25 ``` -> NB: Charliecloud support requires Nextflow version `21.03.0-edge` or later. > NB: You will need an account to use the LRZ Linux cluster as well as group access to the biohpc_gen cluster in order to run nf-core pipelines. > NB: Nextflow will need to submit the jobs via the job scheduler to the HPC cluster and as such the commands above will have to be executed on one of the login nodes. diff --git a/docs/gis.md b/docs/gis.md new file mode 100644 index 0000000..022605e --- /dev/null +++ b/docs/gis.md @@ -0,0 +1,45 @@ +# nf-core/configs: GIS Aquila Configuration + +All nf-core pipelines have been successfully configured for use on the cluster of the GIS (Genome Institute of Singapore (Aquila)). + +To use, run the pipeline with `-profile gis`. This will download and launch the [`gis.config`](../conf/gis.config) which has been pre-configured with a setup suitable for the GIS Aquila cluster. Using this profile, a docker image containing all of the required software will be downloaded, and converted to a Singularity image before execution of the pipeline. + +## How to use on GIS core + +Before running the pipeline you will need to load Nextflow using the environment module system on GIS Aquila. You can do this by issuing the commands below: + +```bash +# Login to a compute node +srun --pty bash + +## Load Nextflow and Singularity environment modules +module purge +source /mnt/projects/rpd/rc/init.2017-04 +module load miniconda3 + + +# Run a nextflow pipeline with dependencies bundled in a conda environment +set +u +source activate nfcore-rnaseq-1.0dev +set -u + +# Run a downloaded/git-cloned nextflow workflow from +nextflow run \\ +nf-core/workflow \\ +-resume \\ +-profile gis \\ +--email my-email@example.org \\ +-c my-specific.config +... + + +# Or use the nf-core client +nextflow run nf-core/rnaseq ... + +``` + +## Databanks + +A local copy of several genomes are available in `/mnt/projects/rpd/genomes.testing/S3_igenomes/` directory. + +> NB: You will need an account to use the HPC cluster on GIS in order to run the pipeline. If in doubt contact IT or go to [Andreas Wilm](https://github.com/andreas-wilm) diff --git a/docs/hki.md b/docs/hki.md new file mode 100644 index 0000000..de45c20 --- /dev/null +++ b/docs/hki.md @@ -0,0 +1,24 @@ +# nf-core/configs: HKI Configuration + +All nf-core pipelines have been successfully configured for use on clusters at the [Leibniz Institute for Natural Product Research and Infection Biology Hans Knöll Institute](https://www.leibniz-hki.de/en). + +To use, run the pipeline with `-profile hki,`. This will download and launch the [`hki.config`](../conf/hki.config) which contains specific profiles for each cluster. The number of parallel jobs that run is currently limited to 8. + +The currently available profiles are: + +- apate (uses singularity, cleanup set to true by default) +- arges (uses singularity, cleanup set to true by default) +- aither (uses singularity, cleanup set to true by default) +- debug (sets cleanup to false for debugging purposes, use e.g. `profile hki,,debug`) + +Note that Nextflow is not necessarily installed by default on the HKI HPC cluster(s). You will need to install it into a directory you have write access to. +Follow these instructions from the Nextflow documentation. + +- Install Nextflow : [here](https://www.nextflow.io/docs/latest/getstarted.html#) + +All of the intermediate files required to run the pipeline will be stored in the `work/` directory. It is recommended to delete this directory after the pipeline +has finished successfully because it can get quite large, and all of the main output files will be saved in the `results/` directory anyway. + +> NB: You will need an account to use the HKI HPC clusters in order to run the pipeline. If in doubt contact the ICT Service Desk. +> NB: Nextflow will need to submit the jobs via SLURM to the HKI HPC clusters and as such the commands above will have to be executed on the login +> node. If in doubt contact ICT. diff --git a/docs/ku_sund_dangpu.md b/docs/ku_sund_dangpu.md new file mode 100644 index 0000000..72baf46 --- /dev/null +++ b/docs/ku_sund_dangpu.md @@ -0,0 +1,35 @@ +# nf-core/configs: ku_sund_dangpu configuration + +All nf-core pipelines have been successfully configured for use on the DANGPU at the +Novo Nordisk Foundation Center for Stem Cell Medicine (reNEW) and the Novo Nordisk Foundation Center for Protein Research (CPR) at the University of Copenhagen. + +To use, run the pipeline with `-profile ku_sund_dangpu`. This will download and launch the [`ku_sund_dangpu.config`](../conf/ku_sund_dangpu.config) which has been pre-configured with a setup suitable for the DANGPU. + +## Modules + +Before running the pipeline you will need to load Nextflow and Singularity using the environment module system on DANGPU. You can do this by issuing the commands below: + +```bash +## Load Nextflow and Singularity environment modules +module purge +module load java/11.0.15 nextflow/22.04.4 singularity/3.8.0 +# alternative modules for older nextflow version (v.21) that works with java 8: +# module load jdk/1.8.0_291 nextflow/21.04.1.5556 singularity/3.8.0 +export NXF_OPTS='-Xms1g -Xmx4g' +export NXF_HOME=/projects/dan1/people/${USER}/cache/nxf-home +export NXF_TEMP=/scratch/tmp +export NXF_SINGULARITY_CACHEDIR=/projects/dan1/people/${USER}/cache/singularity-images +``` + +Create the user-specific nextflow directories if they don't exist yet: + +``` +mkdir $NXF_SINGULARITY_CACHEDIR +mkdir $NXF_HOME +``` + +Finally, download and test the pipeline of choice using the `-profile ku_sund_dangpu`. Note that normally you would run resource-intensive commands with slurm, but in case of nf-core pipelines you do not have to do this: we have pre-configured slurm to be the resource manager within the `ku_sund_dangpu profile`. Just make sure that the pipeline is run within a tmux session. + +``` +nextflow run nf-core/rnaseq -profile test,ku_sund_dangpu +``` diff --git a/docs/mana.md b/docs/mana.md new file mode 100644 index 0000000..4d0be0d --- /dev/null +++ b/docs/mana.md @@ -0,0 +1,45 @@ +# nf-core/configs Mana (at University of Hawaii at Manoa) Configuration + +To use, run the pipeline with `-profile mana`. It will use the following parameters for Mana (UHM HPCC): + +- Load singularity and use it as default container technology +- Setup a container cache directory in your home (~/.singularity_images_cache) +- Select appropriate queues (currently: `shared,exclusive,kill-shared,kill-exclusive`) +- Set the maximum available resources (available in 09/02/2022): + - CPUs: 96 + - Memory: 400.GB + - Time: 72.h + +## Pre-requisites + +In order to run a nf-core pipeline on Mana, you will need to setup nextflow in your environment. +At the moment, nextflow is not available as a module (but might be in the future). + +### Install nextflow in a conda environment + +Before we start, we will need to work on an interactive node (currently, mana doesn't let you execute any program in the login node): + +```bash +# Request an interactive sandbox node for 30 min +srun --pty -t 30 -p sandbox /bin/bash +``` + +To setup nextflow on your account, follow these steps. + +```bash +# Load the latest anaconda3 module +module load lang/Anaconda3/2022.05 + +# Initialize environment +. $(conda info --base)/etc/profile.d/conda.sh + +# Install nextflow (here in base environment, but you can create a new one if you'd like) +conda install -c bioconda nextflow +``` + +If you want these settings to be persistent, you can add the first 2 commands in your .bash_profile file like this: + +```bash +echo "module load lang/Anaconda3/2022.05" >> ~/.bash_profile +echo "$(conda info --base)/etc/profile.d/conda.sh" >> ~/.bash_profile +``` diff --git a/docs/pipeline/atacseq/sbc_sharc.md b/docs/pipeline/atacseq/sbc_sharc.md new file mode 100644 index 0000000..1f33453 --- /dev/null +++ b/docs/pipeline/atacseq/sbc_sharc.md @@ -0,0 +1,11 @@ +# nf-core/configs: ATAC-Seq Specific Configuration - Sheffield Bioinformatics Core Facility ShARC + +Specific configuration for [nf-co.re/atacseq](https://nf-co.re/atacseq) pipeline + +## Usage + +To use, run nextflow with the pipeline using `-profile sbc_sharc` (note the single hyphen). + +This will download and launch the atacseq specific [`sbc_sharc.config`](../../../conf/pipeline/atacseq/sbc_sharc.config) which has been pre-configured with a setup suitable for the [University of Sheffield ShARC cluster](https://docs.hpc.shef.ac.uk/en/latest/index.html) and will automatically load the appropriate pipeline-specific configuration file. + +Example: `nextflow run nf-core/atacseq -profile sbc_sharc` diff --git a/docs/pipeline/chipseq/sbc_sharc.md b/docs/pipeline/chipseq/sbc_sharc.md new file mode 100644 index 0000000..4280db9 --- /dev/null +++ b/docs/pipeline/chipseq/sbc_sharc.md @@ -0,0 +1,11 @@ +# nf-core/configs: ChIP-Seq Specific Configuration - Sheffield Bioinformatics Core Facility ShARC + +Specific configuration for [nf-co.re/chipseq](https://nf-co.re/chipseq) pipeline + +## Usage + +To use, run nextflow with the pipeline using `-profile sbc_sharc` (note the single hyphen). + +This will download and launch the chipseq specific [`sbc_sharc.config`](../../../conf/pipeline/chipseq/sbc_sharc.config) which has been pre-configured with a setup suitable for the [University of Sheffield ShARC cluster](https://docs.hpc.shef.ac.uk/en/latest/index.html) and will automatically load the appropriate pipeline-specific configuration file. + +Example: `nextflow run nf-core/chipseq -profile sbc_sharc` diff --git a/docs/pipeline/demultiplex/aws_tower.md b/docs/pipeline/demultiplex/aws_tower.md new file mode 100644 index 0000000..eb87186 --- /dev/null +++ b/docs/pipeline/demultiplex/aws_tower.md @@ -0,0 +1,19 @@ +# nf-core/configs: AWS Tower Demultiplex specific configuration + +Extra specific configuration for demultiplex pipeline + +## Usage + +To use, run the pipeline with `-profile aws_tower`. + +This will download and launch the demultiplex specific [`aws_tower.config`](../../../conf/pipeline/demultiplex/aws_tower.config) which has been pre-configured with a setup suitable for AWS batch through tower. + +Example: `nextflow run nf-core/demultiplex -profile aws_tower` + +## eager specific configurations for eva + +Specific configurations for AWS has been made for demultiplex. + +### General profiles + +- The general AWS Tower profile runs with default nf-core/demultiplex parameters, but with modifications to account file transfer speed and optimized bases2fastq resources. diff --git a/docs/pipeline/proteinfold/crg.md b/docs/pipeline/proteinfold/crg.md new file mode 100644 index 0000000..9b11d2a --- /dev/null +++ b/docs/pipeline/proteinfold/crg.md @@ -0,0 +1,23 @@ +# nf-core/configs: CRG proteinfold specific configuration + +Extra specific configuration for proteinfold pipeline + +## Usage + +To use, run the pipeline with `-profile crg`. + +This will download and launch the proteinfold specific [`crg.config`](../../../conf/pipeline/proteinfold/crg.config) which has been pre-configured with a setup suitable for the sge cluster. + +Example: `nextflow run nf-core/proteinfold -profile crg` + +## proteinfold specific configurations for CRG + +Specific configurations for CRG has been made for proteinfold. + +### General profiles + + + +### Contextual profiles + + diff --git a/docs/pipeline/rnaseq/sbc_sharc.md b/docs/pipeline/rnaseq/sbc_sharc.md new file mode 100644 index 0000000..d62fe25 --- /dev/null +++ b/docs/pipeline/rnaseq/sbc_sharc.md @@ -0,0 +1,11 @@ +# nf-core/configs: RNA-Seq Specific Configuration - Sheffield Bioinformatics Core Facility ShARC + +Specific configuration for [nf-co.re/rnaseq](https://nf-co.re/rnaseq) pipeline + +## Usage + +To use, run nextflow with the pipeline using `-profile sbc_sharc` (note the single hyphen). + +This will download and launch the rnaseq specific [`sbc_sharc.config`](../../../conf/pipeline/rnaseq/sbc_sharc.config) which has been pre-configured with a setup suitable for the [University of Sheffield ShARC cluster](https://docs.hpc.shef.ac.uk/en/latest/index.html) and will automatically load the appropriate pipeline-specific configuration file. + +Example: `nextflow run nf-core/rnaseq -profile sbc_sharc` diff --git a/docs/pipeline/sarek/sbc_sharc.md b/docs/pipeline/sarek/sbc_sharc.md new file mode 100644 index 0000000..361be18 --- /dev/null +++ b/docs/pipeline/sarek/sbc_sharc.md @@ -0,0 +1,11 @@ +# nf-core/configs: Sarek Specific Configuration - Sheffield Bioinformatics Core Facility ShARC + +Specific configuration for [nf-co.re/sarek](https://nf-co.re/sarek) pipeline + +## Usage + +To use, run nextflow with the pipeline using `-profile sbc_sharc` (note the single hyphen). + +This will download and launch the sarek specific [`sbc_sharc.config`](../../../conf/pipeline/sarek/sbc_sharc.config) which has been pre-configured with a setup suitable for the [University of Sheffield ShARC cluster](https://docs.hpc.shef.ac.uk/en/latest/index.html) and will automatically load the appropriate pipeline-specific configuration file. + +Example: `nextflow run nf-core/sarek -profile sbc_sharc` diff --git a/docs/sage.md b/docs/sage.md index 1e36fed..7b4a8fe 100644 --- a/docs/sage.md +++ b/docs/sage.md @@ -8,6 +8,7 @@ This global configuration includes the following tweaks: - Enable retries by default when exit codes relate to insufficient memory - Allow pending jobs to finish if the number of retries are exhausted - Increase the amount of time allowed for file transfers +- Improve reliability of file transfers with retries and reduced concurrency - Increase the default chunk size for multipart uploads to S3 - Slow down job submission rate to avoid overwhelming any APIs - Define the `check_max()` function, which is missing in Sarek v2 diff --git a/docs/sbc_sharc.md b/docs/sbc_sharc.md new file mode 100644 index 0000000..f82b348 --- /dev/null +++ b/docs/sbc_sharc.md @@ -0,0 +1,40 @@ +# nf-core/configs: Sheffield Bioinformatics Core Facility ShARC Configuration + +## Using the SBC_ShARC Institutional Configuration Profile + +To use [`sbc_sharc.config`](../conf/sbc_sharc.config), run nextflow with an nf-core pipeline using `-profile sbc_sharc` (note the single hyphen). + +This will download and launch [`sbc_sharc.config`](../conf/sbc_sharc.config) which has been pre-configured with a setup suitable for the ShARC cluster and will automatically load the appropriate pipeline-specific configuration file. + +The following nf-core pipelines have been successfully configured for use on the the [University of Sheffield ShARC cluster](https://docs.hpc.shef.ac.uk/en/latest/index.html): + +- [nf-co.re/atacseq](https://nf-co.re/atacseq) +- [nf-co.re/chipseq](https://nf-co.re/chipseq) +- [nf-co.re/rnaseq](https://nf-co.re/rnaseq) +- [nf-co.re/sarek](https://nf-co.re/sarek) + +When using [`sbc_sharc.config`](../conf/sbc_sharc.config) with the pipelines listed above, the appropriate configuration file from the list below will be loaded automatically: + +- [atacseq sbc_sharc.config](../conf/pipeline/atacseq/sbc_sharc.config) +- [chipseq sbc_sharc.config](../conf/pipeline/chipseq/sbc_sharc.config) +- [rnaseq sbc_sharc.config](../conf/pipeline/rnaseq/sbc_sharc.config) +- [sarek sbc_sharc.config](../conf/pipeline/sarek/sbc_sharc.config) + +The [`sbc_sharc.config`](../conf/sbc_sharc.config) configuration file might work with other nf-core pipelines as it stands but we cannot guarantee they will run without issue. We will be continuing to create, test and optimise configurations for new pipelines in the future. + +## A Note on Singularity Containers + +The [`sbc_sharc.config`](../conf/sbc_sharc.config) configuration file supports running nf-core pipelines with Singularity containers; Singularity images will be downloaded automatically before execution of the pipeline. + +When you run nextflow for the first time, Singularity will create a hidden directory `.singularity` in your `$HOME` directory `/home/$USER` which has very very limited (10GB) space available. It is therefore a good idea to create a directory somewhere else (e.g., `/data/$USER`) with more room and link the locations. To do this, run the following series of commands: + +```shell +# change directory to $HOME +cd $HOME + +# make the directory that will be linked to +mkdir /data/$USER/.singularity + +# link the new directory with the existing one +ln -s /data/$USER/.singularity .singularity +``` diff --git a/docs/tigem.md b/docs/tigem.md new file mode 100644 index 0000000..e562fe4 --- /dev/null +++ b/docs/tigem.md @@ -0,0 +1,7 @@ +# nf-core/configs: TIGEM configuration + +To use, run the pipeline with `-profile tigem`. This will download and launch the tigem.config which has been pre-configured with a setup suitable for the TIGEM personal biocluster. + +--- + +This configuration profile can be used on TIGEM clusters, with the pre-installed SLURM job scheduling system. An additional parameter is `google.zone` to allow downloading data from GCP for a specific time zone. It should not interfere with any local or other AWS configuration. diff --git a/docs/ucl_myriad.md b/docs/ucl_myriad.md new file mode 100644 index 0000000..1884a48 --- /dev/null +++ b/docs/ucl_myriad.md @@ -0,0 +1,51 @@ +# nf-core/configs: Myriad Configuration + +All nf-core pipelines have been successfully configured for use on UCL's myriad cluster [University College London](https://www.rc.ucl.ac.uk/docs/Clusters/Myriad/). + +To use, run the pipeline with `-profile ucl_myriad`. This will download and launch the [`ucl_myriad.config`](../conf/ucl_myriad.config) which has been pre-configured with a setup suitable for the myriad cluster. Using this profile, a docker image containing all of the required software will be downloaded, and converted to a Singularity image before execution of the pipeline. + +## Using Nextflow on Myriad + +Before running the pipeline you will need to install and configure Nextflow and Singularity. + +### Singularity + +This can be done with the following commands: + +```bash +## Load Singularity environment modules - these commands can be placed in your ~/.bashrc also +module add java/openjdk-11/11.0.1 +module add singularity-env/1.0.0 +``` + +Then set the correct configuration of the cache directories, where is replaced with you credentials which you can find by entering `whoami` into the terminal once you are logged into myriad. Once you have added your credentials save these lines into your .bashrc file in the base directory (e.g. /home//.bashrc): + +```bash +# Set all the Singularity cache dirs to Scratch +export SINGULARITY_CACHEDIR=/home//Scratch/.singularity/ +export SINGULARITY_TMPDIR=/home//Scratch/.singularity/tmp +export SINGULARITY_LOCALCACHEDIR=/home//Scratch/.singularity/localcache +export SINGULARITY_PULLFOLDER=/home//Scratch/.singularity/pull + +# Bind your Scratch directory so it is accessible from inside the container +export SINGULARITY_BINDPATH=/scratch/scratch/ +``` + +### Nextflow + +Download the latest release of nextflow. Warning: the self-update line should update to the latest version, but sometimes not, so please check which is the latest release (https://github.com/nextflow-io/nextflow/releases), you can then manually set this by entering (`NXF_VER=XX.XX.X`). + +```bash +## Download Nextflow-all +curl -s https://get.nextflow.io | bash +NXF_VER=22.10.0 +nextflow -self-update +chmod a+x nextflow +mv nextflow ~/bin/nextflow +``` + +Then make sure that your bin PATH is executable, by placing the following line in your .bashrc: + +```bash +export PATH=$PATH:/home//bin +``` diff --git a/docs/uppmax.md b/docs/uppmax.md index 891762a..dd664fb 100644 --- a/docs/uppmax.md +++ b/docs/uppmax.md @@ -8,7 +8,16 @@ We have a Slack channel dedicated to UPPMAX users on the nf-core Slack: [https:/ ## Using the UPPMAX config profile -Before running the pipeline you will need to either install `Nextflow` or load it using the environment module system (this can be done with e.g. `module load bioinfo-tools Nextflow/` where `VERSION` is e.g. `20.10`). +The recommended way to activate `Nextflow`, `nf-core`, and any pipeline +available in `nf-core` on UPPMAX is to use the [module system](https://www.uppmax.uu.se/resources/software/module-system/): + +```bash +# Log in to the desired cluster +ssh @{rackham,miarka,bianca}.uppmax.uu.se + +# Activate the modules, you can also choose to use a specific version with e.g. `Nextflow/21.10`. +module load bioinfo-tools Nextflow nf-core nf-core-pipelines +``` To use, run the pipeline with `-profile uppmax` (one hyphen). This will download and launch the [`uppmax.config`](../conf/uppmax.config) which has been pre-configured with a setup suitable for the UPPMAX servers. @@ -94,20 +103,12 @@ Before running a nf-core pipeline on `bianca` you will first have to download th In this guide, we use `rackham` to download and transfer files to the `wharf` area, but it can also be done on your own computer. If you use `rackham` to download the pipeline and the singularity containers, we recommend using an interactive session (cf [interactive guide](https://www.uppmax.uu.se/support/faq/running-jobs-faq/how-can-i-run-interactively-on-a-compute-node/)), which is what we do in the following guide. -### Download and install Nextflow - -You can use the `Nextflow` UPPMAX provided `module`, but if necessary, you can also download a more recent version. - -```bash -# Connect to bianca -$ ssh -A -@bianca.uppmax.uu.se - -# See the available versions for the module -module spider Nextflow +It is recommended to activate `Nextflow`, `nf-core` and your `nf-core` +pipeline through the module system (see **Using the UPPMAX config profile** +above). In case you need a specific version of any of these tools you can +follow the guide below. -# Load a specific version of the Nextflow module -module load bioinfo-tools Nextflow/` -``` +### Download and install Nextflow ```bash # Connect to rackham @@ -160,19 +161,6 @@ $ export NXF_SINGULARITY_CACHEDIR=/castor/project/proj_nobackup/singularity-imag ### Install nf-core tools -You can use the `nf-core` UPPMAX provided `module`, but if necessary, you can also download a more recent version. - -```bash -# Connect to rackham -$ ssh -X @rackham.uppmax.uu.se - -# See the available versions for the module -module spider nf-core - -# Load a specific version of the nf-core module -module load bioinfo-tools nf-core/` -``` - ```bash # Connect to rackham $ ssh -X @rackham.uppmax.uu.se @@ -254,7 +242,7 @@ And then `nf-core/` can be used with: $ nextflow run ~/ -profile uppmax --project --genome ... ``` -## Update a pipeline +### Update a pipeline To update, repeat the same steps as for installing and update the link. diff --git a/nfcore_custom.config b/nfcore_custom.config index fae764b..53d7682 100644 --- a/nfcore_custom.config +++ b/nfcore_custom.config @@ -30,6 +30,7 @@ profiles { cfc_dev { includeConfig "${params.custom_config_base}/conf/cfc_dev.config" } cheaha { includeConfig "${params.custom_config_base}/conf/cheaha.config" } computerome { includeConfig "${params.custom_config_base}/conf/computerome.config" } + crg { includeConfig "${params.custom_config_base}/conf/crg.config" } crick { includeConfig "${params.custom_config_base}/conf/crick.config" } crukmi { includeConfig "${params.custom_config_base}/conf/crukmi.config" } czbiohub_aws { includeConfig "${params.custom_config_base}/conf/czbiohub_aws.config" } @@ -44,12 +45,15 @@ profiles { google { includeConfig "${params.custom_config_base}/conf/google.config" } hasta { includeConfig "${params.custom_config_base}/conf/hasta.config" } hebbe { includeConfig "${params.custom_config_base}/conf/hebbe.config" } + hki { includeConfig "${params.custom_config_base}/conf/hki.config"} icr_davros { includeConfig "${params.custom_config_base}/conf/icr_davros.config" } ifb_core { includeConfig "${params.custom_config_base}/conf/ifb_core.config" } imperial { includeConfig "${params.custom_config_base}/conf/imperial.config" } jax { includeConfig "${params.custom_config_base}/conf/jax.config" } + ku_sund_dangpu {includeConfig "${params.custom_config_base}/conf/ku_sund_dangpu.config"} lugh { includeConfig "${params.custom_config_base}/conf/lugh.config" } maestro { includeConfig "${params.custom_config_base}/conf/maestro.config" } + mana { includeConfig "${params.custom_config_base}/conf/mana.config" } marvin { includeConfig "${params.custom_config_base}/conf/marvin.config" } medair { includeConfig "${params.custom_config_base}/conf/medair.config" } mjolnir_globe { includeConfig "${params.custom_config_base}/conf/mjolnir_globe.config" } @@ -65,7 +69,10 @@ profiles { sage { includeConfig "${params.custom_config_base}/conf/sage.config" } sahmri { includeConfig "${params.custom_config_base}/conf/sahmri.config" } sanger { includeConfig "${params.custom_config_base}/conf/sanger.config"} + sbc_sharc { includeConfig "${params.custom_config_base}/conf/sbc_sharc.config"} seg_globe { includeConfig "${params.custom_config_base}/conf/seg_globe.config"} + tigem { includeConfig "${params.custom_config_base}/conf/tigem.config"} + ucl_myriad { includeConfig "${params.custom_config_base}/conf/ucl_myriad.config"} uct_hpc { includeConfig "${params.custom_config_base}/conf/uct_hpc.config" } unibe_ibu { includeConfig "${params.custom_config_base}/conf/unibe_ibu.config" } uppmax { includeConfig "${params.custom_config_base}/conf/uppmax.config" } diff --git a/pipeline/atacseq.config b/pipeline/atacseq.config new file mode 100644 index 0000000..f205f62 --- /dev/null +++ b/pipeline/atacseq.config @@ -0,0 +1,13 @@ +/* + * ------------------------------------------------- + * nfcore/atacseq custom profile Nextflow config file + * ------------------------------------------------- + * Config options for custom environments. + * Cluster-specific config options should be saved + * in the conf/pipeline/atacseq folder and imported + * under a profile name here. + */ + +profiles { + sbc_sharc { includeConfig "${params.custom_config_base}/conf/pipeline/atacseq/sbc_sharc.config" } +} diff --git a/pipeline/chipseq.config b/pipeline/chipseq.config new file mode 100644 index 0000000..242aa92 --- /dev/null +++ b/pipeline/chipseq.config @@ -0,0 +1,13 @@ +/* + * ------------------------------------------------- + * nfcore/chipseq custom profile Nextflow config file + * ------------------------------------------------- + * Config options for custom environments. + * Cluster-specific config options should be saved + * in the conf/pipeline/chipseq folder and imported + * under a profile name here. + */ + +profiles { + sbc_sharc { includeConfig "${params.custom_config_base}/conf/pipeline/chipseq/sbc_sharc.config" } +} diff --git a/pipeline/demultiplex.config b/pipeline/demultiplex.config new file mode 100644 index 0000000..46c3e08 --- /dev/null +++ b/pipeline/demultiplex.config @@ -0,0 +1,13 @@ +/* + * ------------------------------------------------- + * nfcore/demultiplex custom profile Nextflow config file + * ------------------------------------------------- + * Config options for custom environments. + * Cluster-specific config options should be saved + * in the conf/pipeline/demultiplex folder and imported + * under a profile name here. + */ + +profiles { + aws_tower { includeConfig "${params.custom_config_base}/conf/pipeline/demultiplex/aws_tower.config" } +} diff --git a/pipeline/proteinfold.config b/pipeline/proteinfold.config new file mode 100644 index 0000000..e4f59b4 --- /dev/null +++ b/pipeline/proteinfold.config @@ -0,0 +1,13 @@ +/* + * ------------------------------------------------- + * nfcore/proteinfold custom profile Nextflow config file + * ------------------------------------------------- + * Config options for custom environments. + * Cluster-specific config options should be saved + * in the conf/pipeline/proteinfold folder and imported + * under a profile name here. + */ + +profiles { + crg { includeConfig "${params.custom_config_base}/conf/pipeline/proteinfold/crg.config" } +} diff --git a/pipeline/rnaseq.config b/pipeline/rnaseq.config index 0486d86..b1d470f 100644 --- a/pipeline/rnaseq.config +++ b/pipeline/rnaseq.config @@ -11,5 +11,6 @@ profiles { eddie { includeConfig "${params.custom_config_base}/conf/pipeline/rnaseq/eddie.config" } mpcdf { includeConfig "${params.custom_config_base}/conf/pipeline/rnaseq/mpcdf.config" } + sbc_sharc { includeConfig "${params.custom_config_base}/conf/pipeline/rnaseq/sbc_sharc.config" } utd_sysbio { includeConfig "${params.custom_config_base}/conf/pipeline/rnaseq/utd_sysbio.config" } } diff --git a/pipeline/sarek.config b/pipeline/sarek.config index 12676b2..3c087aa 100644 --- a/pipeline/sarek.config +++ b/pipeline/sarek.config @@ -15,5 +15,6 @@ profiles { eddie { includeConfig "${params.custom_config_base}/conf/pipeline/sarek/eddie.config" } icr_davros { includeConfig "${params.custom_config_base}/conf/pipeline/sarek/icr_davros.config" } munin { includeConfig "${params.custom_config_base}/conf/pipeline/sarek/munin.config" } + sbc_sharc { includeConfig "${params.custom_config_base}/conf/pipeline/sarek/sbc_sharc.config" } uppmax { includeConfig "${params.custom_config_base}/conf/pipeline/sarek/uppmax.config" } }