diff --git a/.editorconfig b/.editorconfig index 835c83d..70c7a9a 100644 --- a/.editorconfig +++ b/.editorconfig @@ -8,5 +8,5 @@ trim_trailing_whitespace = true indent_size = 4 indent_style = space -[*.{md,yml,yaml}] +[*.{md,yml,yaml,cff}] indent_size = 2 diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index ac31dfe..f5ba0d1 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -66,6 +66,7 @@ jobs: - "jax" - "lugh" - "marvin" + - "medair" - "mjolnir_globe" - "maestro" - "mpcdf" @@ -77,6 +78,7 @@ jobs: - "phoenix" - "prince" - "rosalind" + - "sage" - "sahmri" - "sanger" - "seg_globe" diff --git a/CITATION.cff b/CITATION.cff new file mode 100644 index 0000000..4533e2f --- /dev/null +++ b/CITATION.cff @@ -0,0 +1,56 @@ +cff-version: 1.2.0 +message: "If you use `nf-core tools` in your work, please cite the `nf-core` publication" +authors: + - family-names: Ewels + given-names: Philip + - family-names: Peltzer + given-names: Alexander + - family-names: Fillinger + given-names: Sven + - family-names: Patel + given-names: Harshil + - family-names: Alneberg + given-names: Johannes + - family-names: Wilm + given-names: Andreas + - family-names: Ulysse Garcia + given-names: Maxime + - family-names: Di Tommaso + given-names: Paolo + - family-names: Nahnsen + given-names: Sven +title: "The nf-core framework for community-curated bioinformatics pipelines." +version: 2.4.1 +doi: 10.1038/s41587-020-0439-x +date-released: 2022-05-16 +url: https://github.com/nf-core/tools +prefered-citation: + type: article + authors: + - family-names: Ewels + given-names: Philip + - family-names: Peltzer + given-names: Alexander + - family-names: Fillinger + given-names: Sven + - family-names: Patel + given-names: Harshil + - family-names: Alneberg + given-names: Johannes + - family-names: Wilm + given-names: Andreas + - family-names: Ulysse Garcia + given-names: Maxime + - family-names: Di Tommaso + given-names: Paolo + - family-names: Nahnsen + given-names: Sven + doi: 10.1038/s41587-020-0439-x + journal: nature biotechnology + start: 276 + end: 278 + title: "The nf-core framework for community-curated bioinformatics pipelines." + issue: 3 + volume: 38 + year: 2020 + url: https://dx.doi.org/10.1038/s41587-020-0439-x diff --git a/README.md b/README.md index 1f37d29..94ce55c 100644 --- a/README.md +++ b/README.md @@ -10,7 +10,6 @@ A repository for hosting Nextflow configuration files containing custom paramete - [Configuration and parameters](#configuration-and-parameters) - [Offline usage](#offline-usage) - [Adding a new config](#adding-a-new-config) - - [Checking user hostnames](#checking-user-hostnames) - [Testing](#testing) - [Documentation](#documentation) - [Uploading to `nf-core/configs`](#uploading-to-nf-coreconfigs) @@ -121,6 +120,7 @@ Currently documentation is available for the following systems: - [LUGH](docs/lugh.md) - [MAESTRO](docs/maestro.md) - [MARVIN](docs/marvin.md) +- [MEDAIR](docs/medair.md) - [MJOLNIR_GLOBE](docs/mjolnir_globe.md) - [MPCDF](docs/mpcdf.md) - [MUNIN](docs/munin.md) @@ -131,6 +131,7 @@ Currently documentation is available for the following systems: - [PHOENIX](docs/phoenix.md) - [PRINCE](docs/prince.md) - [ROSALIND](docs/rosalind.md) +- [SAGE BIONETWORKS](docs/sage.md) - [SANGER](docs/sanger.md) - [SEG_GLOBE](docs/seg_globe.md) - [UCT_HPC](docs/uct_hpc.md) diff --git a/conf/hasta.config b/conf/hasta.config index 7e97e5a..c8c1f01 100644 --- a/conf/hasta.config +++ b/conf/hasta.config @@ -16,19 +16,29 @@ singularity { params { max_memory = 180.GB max_cpus = 36 - max_time = 336.h + max_time = 336.h } process { executor = 'slurm' - clusterOptions = { "-A $params.priority ${params.clusterOptions ?: ''}" } + clusterOptions = { "-A $params.priority ${params.clusterOptions ?: ''}" } } profiles { - dev_prio { + stub_prio { params { priority = 'development' clusterOptions = "--qos=low" + max_memory = 6.GB + max_cpus = 1 + max_time = 1.h + } + } + + dev_prio { + params { + priority = 'development' + clusterOptions = "--qos=low" } } diff --git a/conf/medair.config b/conf/medair.config new file mode 100644 index 0000000..d147643 --- /dev/null +++ b/conf/medair.config @@ -0,0 +1,46 @@ +//Profile config names for nf-core/configs +params { + config_profile_description = 'Cluster profile for medair (local cluster of Clinical Genomics Gothenburg)' + config_profile_contact = 'Clinical Genomics, Gothenburg (cgg-rd@gu.se, cgg-it@gu.se)' + config_profile_url = 'https://www.scilifelab.se/units/clinical-genomics-goteborg/' +} + +//Nextflow parameters +singularity { + enabled = true + cacheDir = "/apps/bio/dependencies/nf-core/singularities" +} + +profiles { + + wgs { + process { + queue = 'wgs.q' + executor = 'sge' + penv = 'mpi' + process.clusterOptions = '-l excl=1' + params.max_cpus = 40 + params.max_time = 48.h + params.max_memory = 128.GB + } + } + + production { + process { + queue = 'production.q' + executor = 'sge' + penv = 'mpi' + process.clusterOptions = '-l excl=1' + params.max_cpus = 40 + params.max_time = 480.h + params.max_memory = 128.GB + } + } +} + +//Specific parameter for pipelines that can use Sentieon (e.g. nf-core/sarek, nf-core/raredisease) +process { + withLabel:'sentieon' { + container = "/apps/bio/singularities/sentieon-211204-peta.simg" + } +} diff --git a/conf/mpcdf.config b/conf/mpcdf.config index ee33913..93e2924 100644 --- a/conf/mpcdf.config +++ b/conf/mpcdf.config @@ -61,7 +61,7 @@ profiles { params { config_profile_description = 'MPCDF raven profile (unofficially) provided by nf-core/configs.' - memory = 2000000.MB + max_memory = 2000000.MB max_cpus = 72 max_time = 24.h } diff --git a/conf/pipeline/eager/eva.config b/conf/pipeline/eager/eva.config index f35f1ed..8376719 100644 --- a/conf/pipeline/eager/eva.config +++ b/conf/pipeline/eager/eva.config @@ -6,11 +6,15 @@ params { config_profile_description = 'nf-core/eager EVA profile provided by nf-core/configs' } +env { + _JAVA_OPTIONS = "-XX:ParallelGCThreads=1" + OPENBLAS_NUM_THREADS = 1 + OMP_NUM_THREADS = 1 +} + // Specific nf-core/eager process configuration process { - beforeScript = 'export _JAVA_OPTIONS="-XX:ParallelGCThreads=1"' - maxRetries = 2 // Solution for clusterOptions comes from here: https://github.com/nextflow-io/nextflow/issues/332 + personal toMega conversion @@ -69,17 +73,17 @@ process { clusterOptions = { "-S /bin/bash -V -l h_vmem=${(task.memory.toGiga() * 2)}G" } errorStrategy = { task.exitStatus in [1,143,137,104,134,139,140] ? 'retry' : 'finish' } } - + withName: fastqc_after_clipping { clusterOptions = { "-S /bin/bash -V -l h_vmem=${(task.memory.toGiga() * 2)}G" } - errorStrategy = { task.exitStatus in [1,143,137,104,134,139,140] ? 'retry' : 'finish' } + errorStrategy = { task.exitStatus in [1,143,137,104,134,139,140] ? 'retry' : 'finish' } } withName: adapter_removal { clusterOptions = { "-S /bin/bash -V -l h_vmem=${(task.memory.toGiga() * 2)}G" } errorStrategy = { task.exitStatus in [1,143,137,104,134,139,140] ? 'retry' : 'finish' } } - + withName: bwa { clusterOptions = { "-S /bin/bash -V -l h_vmem=${(task.memory.toGiga())}G,h=!(bionode01|bionode02|bionode03|bionode04|bionode05|bionode06)" } } @@ -188,26 +192,18 @@ process { clusterOptions = { "-S /bin/bash -V -l h_vmem=${(task.memory.toGiga() * 2)}G" } errorStrategy = { task.exitStatus in [1,143,137,104,134,139,140] ? 'retry' : 'finish' } } - + withName:get_software_versions { cache = false clusterOptions = { "-S /bin/bash -V -l h=!(bionode06)" } - beforeScript = 'export _JAVA_OPTIONS="-XX:ParallelGCThreads=1 -Xmx512m"; export OPENBLAS_NUM_THREADS=1; export OMP_NUM_THREADS=1' - clusterOptions = { "-S /bin/bash -V -l h_vmem=${(task.memory.toMega())}M" } + clusterOptions = { "-S /bin/bash -V -l h_vmem=${(task.memory.toMega() * 8)}M" } errorStrategy = { task.exitStatus in [1,143,137,104,134,139,140] ? 'retry' : 'finish' } } - - withName:eigenstrat_snp_coverage { - beforeScript = 'export OPENBLAS_NUM_THREADS=1; export OMP_NUM_THREADS=1' - } - - withName:kraken_merge { - beforeScript = 'export OPENBLAS_NUM_THREADS=1; export OMP_NUM_THREADS=1' - } withName:multiqc { - beforeScript = 'export OPENBLAS_NUM_THREADS=1; export OMP_NUM_THREADS=1;' + clusterOptions = { "-S /bin/bash -V -j y -o output.log -l h_vmem=${task.memory.toGiga() * 2}G" } } + } profiles { @@ -226,8 +222,6 @@ profiles { process { - beforeScript = 'export _JAVA_OPTIONS="-XX:ParallelGCThreads=1"' - maxRetries = 2 // Solution for clusterOptions comes from here: https://github.com/nextflow-io/nextflow/issues/332 + personal toMega conversion @@ -279,7 +273,7 @@ profiles { clusterOptions = { "-S /bin/bash -V -l h_vmem=${(task.memory.toGiga() * 3)}G" } errorStrategy = { task.exitStatus in [1,143,137,104,134,139,140] ? 'retry' : 'finish' } } - + withName: fastqc_after_clipping { clusterOptions = { "-S /bin/bash -V -l h_vmem=${(task.memory.toGiga() * 3)}G" } errorStrategy = { task.exitStatus in [1,143,137,104,134,139,140] ? 'retry' : 'finish' } @@ -404,8 +398,6 @@ profiles { process { - beforeScript = 'export _JAVA_OPTIONS="-XX:ParallelGCThreads=1"' - maxRetries = 2 // Solution for clusterOptions comes from here: https://github.com/nextflow-io/nextflow/issues/332 + personal toMega conversion @@ -457,7 +449,7 @@ profiles { clusterOptions = { "-S /bin/bash -V -l h_vmem=${(task.memory.toGiga() * 6)}G" } errorStrategy = { task.exitStatus in [1,143,137,104,134,139,140] ? 'retry' : 'finish' } } - + withName: fastqc_after_clipping { clusterOptions = { "-S /bin/bash -V -l h_vmem=${(task.memory.toGiga() * 6)}G" } errorStrategy = { task.exitStatus in [1,143,137,104,134,139,140] ? 'retry' : 'finish' } diff --git a/conf/pipeline/raredisease/hasta.config b/conf/pipeline/raredisease/hasta.config index a076dbc..4c4d15e 100644 --- a/conf/pipeline/raredisease/hasta.config +++ b/conf/pipeline/raredisease/hasta.config @@ -1,5 +1,5 @@ process { - + withName:'PICARD_MARKDUPLICATES' { memory = { check_max( 90.GB * task.attempt, 'memory' ) } } @@ -7,7 +7,26 @@ process { cpus = { check_max( 16 * task.attempt, 'cpus' ) } memory = { check_max( 80.GB * task.attempt, 'memory' ) } } - withName:'QUALIMAP_BAMQC' { - ext.args = { "--java-mem-size=${task.memory.giga / 1.15 as long}G" } + withLabel:'sentieon' { + beforeScript = { "export PATH=\$PATH:\$SENTIEON_INSTALL_DIR/sentieon-genomics-202112.02/bin" } } -} \ No newline at end of file + withName: 'BCFTOOLS_VIEW' { + if (params.genome == 'GRCh37') { + ext.args = '--output-type z --apply-filters PASS --exclude "INFO/clinical_genomics_mipAF > 0.40 | INFO/swegenAF > 0.40 | INFO/clingen_ngiAF > 0.40 | INFO/gnomad_svAF > 0.40 "' + } else if (params.genome == 'GRCh38') { + ext.args = '--output-type z --apply-filters PASS --exclude "INFO/swegen_FRQ > 0.40"' + } + publishDir = [ + enabled: false, + ] + } + + // Java memory fixes + withName:'QUALIMAP_BAMQC' { + clusterOptions = { "-A $params.priority ${params.clusterOptions ?: ''} ${task.memory ? "--mem ${task.memory.mega * 1.15 as long}M" : ''}" } + } + withName:'PICARD_MARKDUPLICATES' { + clusterOptions = { "-A $params.priority ${params.clusterOptions ?: ''} ${task.memory ? "--mem ${task.memory.mega * 1.15 as long}M" : ''}" } + } + +} diff --git a/conf/pipeline/viralrecon/genomes.config b/conf/pipeline/viralrecon/genomes.config index baad834..06c8577 100644 --- a/conf/pipeline/viralrecon/genomes.config +++ b/conf/pipeline/viralrecon/genomes.config @@ -8,23 +8,27 @@ params { // Genome reference file paths genomes { + + // SARS-CoV-2 'NC_045512.2' { // This version of the reference has been kept here for backwards compatibility. // Please use 'MN908947.3' if possible because all primer sets are available / have been pre-prepared relative to that assembly fasta = 'https://github.com/nf-core/test-datasets/raw/viralrecon/genome/NC_045512.2/GCF_009858895.2_ASM985889v3_genomic.200409.fna.gz' gff = 'https://github.com/nf-core/test-datasets/raw/viralrecon/genome/NC_045512.2/GCF_009858895.2_ASM985889v3_genomic.200409.gff.gz' - nextclade_dataset = 'https://github.com/nf-core/test-datasets/raw/viralrecon/genome/MN908947.3/nextclade_sars-cov-2_MN908947_2022-01-18T12_00_00Z.tar.gz' + nextclade_dataset = 'https://github.com/nf-core/test-datasets/raw/viralrecon/genome/MN908947.3/nextclade_sars-cov-2_MN908947_2022-06-14T12_00_00Z.tar.gz' nextclade_dataset_name = 'sars-cov-2' nextclade_dataset_reference = 'MN908947' - nextclade_dataset_tag = '2022-01-18T12:00:00Z' + nextclade_dataset_tag = '2022-06-14T12:00:00Z' } + + // SARS-CoV-2 'MN908947.3' { fasta = 'https://github.com/nf-core/test-datasets/raw/viralrecon/genome/MN908947.3/GCA_009858895.3_ASM985889v3_genomic.200409.fna.gz' gff = 'https://github.com/nf-core/test-datasets/raw/viralrecon/genome/MN908947.3/GCA_009858895.3_ASM985889v3_genomic.200409.gff.gz' - nextclade_dataset = 'https://github.com/nf-core/test-datasets/raw/viralrecon/genome/MN908947.3/nextclade_sars-cov-2_MN908947_2022-01-18T12_00_00Z.tar.gz' + nextclade_dataset = 'https://github.com/nf-core/test-datasets/raw/viralrecon/genome/MN908947.3/nextclade_sars-cov-2_MN908947_2022-06-14T12_00_00Z.tar.gz' nextclade_dataset_name = 'sars-cov-2' nextclade_dataset_reference = 'MN908947' - nextclade_dataset_tag = '2022-01-18T12:00:00Z' + nextclade_dataset_tag = '2022-06-14T12:00:00Z' primer_sets { artic { '1' { @@ -66,5 +70,28 @@ params { } } } + + // Monkeypox + 'NC_063383.1' { + fasta = 'https://github.com/nf-core/test-datasets/raw/viralrecon/genome/NC_063383.1/GCF_014621545.1_ASM1462154v1_genomic.220824.fna.gz' + gff = 'https://github.com/nf-core/test-datasets/raw/viralrecon/genome/NC_063383.1/GCF_014621545.1_ASM1462154v1_genomic.220824.gff.gz' + nextclade_dataset = 'https://github.com/nf-core/test-datasets/raw/viralrecon/genome/NC_063383.1/nextclade_hMPXV_NC_063383.1_2022-08-19T12_00_00Z.tar.gz' + nextclade_dataset_name = 'hMPXV' + nextclade_dataset_reference = 'NC_063383.1' + nextclade_dataset_tag = '2022-08-19T12:00:00Z' + } + + // Monkeypox + 'ON563414.3' { + fasta = 'https://github.com/nf-core/test-datasets/raw/viralrecon/genome/ON563414.3/GCA_023516015.3_ASM2351601v1_genomic.220824.fna.gz' + gff = 'https://github.com/nf-core/test-datasets/raw/viralrecon/genome/ON563414.3/GCA_023516015.3_ASM2351601v1_genomic.220824.gff.gz' + } + + // Monkeypox + 'MT903344.1' { + fasta = 'https://github.com/nf-core/test-datasets/raw/viralrecon/genome/MT903344.1/GCA_014621585.1_ASM1462158v1_genomic.220824.fna.gz' + gff = 'https://github.com/nf-core/test-datasets/raw/viralrecon/genome/MT903344.1/GCA_014621585.1_ASM1462158v1_genomic.220824.gff.gz' + } + } } diff --git a/conf/sage.config b/conf/sage.config new file mode 100644 index 0000000..e5bfa8b --- /dev/null +++ b/conf/sage.config @@ -0,0 +1,100 @@ +params { + config_profile_description = 'The Sage Bionetworks profile' + config_profile_contact = 'Bruno Grande (@BrunoGrandePhD)' + config_profile_url = 'https://github.com/Sage-Bionetworks-Workflows' +} + +process { + + cpus = { check_max( 1 * slow(task.attempt), 'cpus' ) } + memory = { check_max( 6.GB * task.attempt, 'memory' ) } + time = { check_max( 24.h * task.attempt, 'time' ) } + + errorStrategy = { task.exitStatus in [143,137,104,134,139,247] ? 'retry' : 'finish' } + maxRetries = 5 + maxErrors = '-1' + + // Process-specific resource requirements + withLabel:process_low { + cpus = { check_max( 4 * slow(task.attempt), 'cpus' ) } + memory = { check_max( 12.GB * task.attempt, 'memory' ) } + time = { check_max( 24.h * task.attempt, 'time' ) } + } + withLabel:process_medium { + cpus = { check_max( 12 * slow(task.attempt), 'cpus' ) } + memory = { check_max( 36.GB * task.attempt, 'memory' ) } + time = { check_max( 48.h * task.attempt, 'time' ) } + } + withLabel:process_high { + cpus = { check_max( 24 * slow(task.attempt), 'cpus' ) } + memory = { check_max( 72.GB * task.attempt, 'memory' ) } + time = { check_max( 96.h * task.attempt, 'time' ) } + } + withLabel:process_long { + time = { check_max( 192.h * task.attempt, 'time' ) } + } + withLabel:process_high_memory { + memory = { check_max( 128.GB * task.attempt, 'memory' ) } + } + + // Preventing Sarek labels from using the actual maximums + withLabel:memory_max { + memory = { check_max( 128.GB * task.attempt, 'memory' ) } + } + withLabel:cpus_max { + cpus = { check_max( 24 * slow(task.attempt), 'cpus' ) } + } + +} + +aws { + region = "us-east-1" +} + +params { + igenomes_base = 's3://sage-igenomes/igenomes' + max_memory = 500.GB + max_cpus = 64 + max_time = 168.h // One week +} + +// Function to slow the increase of the resource multipler +// as attempts are made. The rationale is that some CPUs +// don't need to be increased as fast as memory. +def slow(attempt, factor = 2) { + return Math.ceil( attempt / factor) as int +} + + +// Function to ensure that resource requirements don't go +// beyond a maximum limit (copied here for Sarek v2) +def check_max(obj, type) { + if (type == 'memory') { + try { + if (obj.compareTo(params.max_memory as nextflow.util.MemoryUnit) == 1) + return params.max_memory as nextflow.util.MemoryUnit + else + return obj + } catch (all) { + println " ### ERROR ### Max memory '${params.max_memory}' is not valid! Using default value: $obj" + return obj + } + } else if (type == 'time') { + try { + if (obj.compareTo(params.max_time as nextflow.util.Duration) == 1) + return params.max_time as nextflow.util.Duration + else + return obj + } catch (all) { + println " ### ERROR ### Max time '${params.max_time}' is not valid! Using default value: $obj" + return obj + } + } else if (type == 'cpus') { + try { + return Math.min( obj, params.max_cpus as int ) + } catch (all) { + println " ### ERROR ### Max cpus '${params.max_cpus}' is not valid! Using default value: $obj" + return obj + } + } +} diff --git a/conf/sanger.config b/conf/sanger.config index 1955ca9..df2dee7 100644 --- a/conf/sanger.config +++ b/conf/sanger.config @@ -1,35 +1,33 @@ +// Profile details params { - config_profile_description = 'The Wellcome Sanger Institute HPC cluster profile' - config_profile_contact = 'Anthony Underwood (@aunderwo)' - config_profile_url = 'https://www.sanger.ac.uk/group/informatics-support-group/' -} - -singularity { - enabled = true - cacheDir = "${baseDir}/singularity" - runOptions = '--bind /lustre --bind /nfs/pathnfs01 --bind /nfs/pathnfs02 --bind /nfs/pathnfs03 --bind /nfs/pathnfs04 --bind /nfs/pathnfs05 --bind /nfs/pathnfs06 --no-home' + config_profile_description = 'The Wellcome Sanger Institute HPC cluster (farm5) profile' + config_profile_contact = 'Priyanka Surana (@priyanka-surana)' + config_profile_url = 'https://www.sanger.ac.uk' } +// Queue and retry strategy process{ - executor = 'lsf' - queue = 'normal' - errorStrategy = { task.attempt <= 5 ? "retry" : "finish" } - process.maxRetries = 5 - withLabel:process_long { - queue = 'long' - } + executor = 'lsf' + queue = { task.time < 12.h ? 'normal' : task.time < 48.h ? 'long' : 'basement' } + errorStrategy = 'retry' + maxRetries = 5 } +// Executor details executor{ - name = 'lsf' - perJobMemLimit = true - poolSize = 4 - submitRateLimit = '5 sec' - killBatchSize = 50 + name = 'lsf' + perJobMemLimit = true + poolSize = 4 + submitRateLimit = '5 sec' + killBatchSize = 50 } +// Max resources params { - max_memory = 128.GB - max_cpus = 64 - max_time = 48.h + max_memory = 683.GB + max_cpus = 256 + max_time = 720.h } + +// For singularity +singularity.runOptions = '--bind /lustre --bind /nfs' diff --git a/conf/vsc_ugent.config b/conf/vsc_ugent.config index f2558fc..0bc6ffd 100644 --- a/conf/vsc_ugent.config +++ b/conf/vsc_ugent.config @@ -7,9 +7,9 @@ workDir = "$scratch_dir/work" // Perform work directory cleanup when the run has succesfully completed // cleanup = true -// Reduce the job submit rate to about 10 per second, this way the server won't be bombarded with jobs +// Reduce the job submit rate to about 5 per second, this way the server won't be bombarded with jobs executor { - submitRateLimit = '10 sec' + submitRateLimit = '3 sec' } // Specify that singularity should be used and where the cache dir will be for the images @@ -19,6 +19,10 @@ singularity { cacheDir = "$scratch_dir/singularity" } +env { + SINGULARITY_CACHEDIR="$scratch_dir/.singularity" +} + // Define profiles for each cluster profiles { skitty { @@ -35,7 +39,6 @@ profiles { executor = 'slurm' queue = 'skitty' maxRetries = 2 - beforeScript = "export SINGULARITY_CACHEDIR=$scratch_dir/.singularity" scratch = "$scratch_dir" } } @@ -54,7 +57,6 @@ profiles { executor = 'slurm' queue = 'swalot' maxRetries = 2 - beforeScript = "export SINGULARITY_CACHEDIR=$scratch_dir/.singularity" scratch = "$scratch_dir" } } @@ -73,7 +75,6 @@ profiles { executor = 'slurm' queue = 'victini' maxRetries = 2 - beforeScript = "export SINGULARITY_CACHEDIR=$scratch_dir/.singularity" scratch = "$scratch_dir" } } @@ -92,7 +93,6 @@ profiles { executor = 'slurm' queue = 'kirlia' maxRetries = 2 - beforeScript = "export SINGULARITY_CACHEDIR=$scratch_dir/.singularity" scratch = "$scratch_dir" } } @@ -111,7 +111,6 @@ profiles { executor = 'slurm' queue = 'doduo' maxRetries = 2 - beforeScript = "export SINGULARITY_CACHEDIR=$scratch_dir/.singularity" scratch = "$scratch_dir" } } diff --git a/docs/medair.md b/docs/medair.md new file mode 100644 index 0000000..706332f --- /dev/null +++ b/docs/medair.md @@ -0,0 +1,70 @@ +# nf-core/configs: Medair Configuration + +All nf-core pipelines have been successfully configured for use on the Medair cluster at Clinical Genomics Gothenburg. + +To use, run the pipeline with `-profile medair`. This will download and launch the [`medair.config`](../conf/medair.config) which has been pre-configured with a setup suitable for the Medair cluster. +It will enable Nextflow to manage the pipeline jobs via the `SGE` job scheduler. +Using this profile, a docker image containing all of the required software will be downloaded, and converted to a Singularity image before execution of the pipeline. + +You will need an account to use the Medair cluster in order to download or run pipelines. If in doubt, contact cgg-it. + +## Download nf-core pipelines + +### Set-up: load Nextflow and nf-core tools + +First you need to load relevant softwares: Nextflow and nf-core tools. You can do it as follow: + +```bash +## Load Nextflow +module load nextflow +## Load nf-core tools +module load miniconda +source activate nf-core +``` + +### Storage of Singularity images + +When downloading a nf-core pipeline for the first time (or a specific version of a pipeline), you can choose to store the Singularity image for future use. We chose to have a central location for these images on medair: `/apps/bio/dependencies/nf-core/singularities`. + +For Nexflow to know where to store new images, run or add the following to your `.bashrc`: + +```bash +export NXF_SINGULARITY_CACHEDIR="/apps/bio/dependencies/nf-core/singularities" +``` + +> Comment: This was also added to cronuser. + +### Download a pipeline + +We have started to download pipelines in the following location: `/apps/bio/repos/nf-core/` + +Use the `nf-core download --singularity-cache-only` command to start a download. It will open an interactive menu. Choose `singularity` for the software container image, and `none` for the compression type. + +## Run nf-core pipelines + +Nextflow will need to submit the jobs via the job scheduler to the HPC cluster and as such the commands below will have to be executed on one of the login nodes. If in doubt contact cgg-it (cgg-it[at]gu.se). + +### Set-up: load Nextflow and Singularity + +Before running a pipeline you will need to load Nextflow and Singularity using the environment module system on Medair. You can do this by issuing the commands below: + +```bash +## Load Nextflow and Singularity environment modules +module purge +module load nextflow +module load singularity +``` + +### Choose a profile + +Depending on what you are running, you can choose between the `wgs` and `production` profiles. Jobs running with the `wgs` profile run on a queue with higher priority. Jobs running with the `production` profile can last longer (max time: 20 days, versus 2 days for the `wgs` profile). + +For example, the following job would run with the `wgs` profile: + +```bash +run nextflow nf-core/raredisease -profile medair,wgs +``` + +### Sentieon + +In some pipelines (sarek, raredisease) it is possible to use Sentieon for alignment and variant calling. If ones uses the label `sentieon` for running a process, the config file contains the path to the Sentieon singularity image on Medair. diff --git a/docs/sage.md b/docs/sage.md new file mode 100644 index 0000000..133ccec --- /dev/null +++ b/docs/sage.md @@ -0,0 +1,27 @@ +# nf-core/configs: Sage Bionetworks Global Configuration + +To use this custom configuration, run the pipeline with `-profile sage`. This will download and load the [`sage.config`](../conf/sage.config), which contains a number of optimizations relevant to Sage employees running workflows on AWS (_e.g._ using Nextflow Tower). This profile will also load any applicable pipeline-specific configuration. + +This global configuration includes the following tweaks: + +- Update the default value for `igenomes_base` to `s3://sage-igenomes` +- Increase the default time limits because we run pipelines on AWS +- Enable retries by default when exit codes relate to insufficient memory +- Allow pending jobs to finish if the number of retries are exhausted +- Slow the increase in the number of allocated CPU cores on retries +- Define the `check_max()` function, which is missing in Sarek v2 + +## Additional information about iGenomes + +The following iGenomes prefixes have been copied from `s3://ngi-igenomes/` (`eu-west-1`) to `s3://sage-igenomes` (`us-east-1`). See [this script](https://github.com/Sage-Bionetworks-Workflows/nextflow-infra/blob/main/bin/mirror-igenomes.sh) for more information. The `sage-igenomes` S3 bucket has been configured to openly available, but files cannot be downloaded out of `us-east-1` to avoid egress charges. You can check the `conf/igenomes.config` file in each nf-core pipeline to figure out the mapping between genome IDs (_i.e._ for `--genome`) and iGenomes prefixes ([example](https://github.com/nf-core/rnaseq/blob/89bf536ce4faa98b4d50a8ec0a0343780bc62e0a/conf/igenomes.config#L14-L26)). + +- **Human Genome Builds** + - `Homo_sapiens/Ensembl/GRCh37` + - `Homo_sapiens/GATK/GRCh37` + - `Homo_sapiens/UCSC/hg19` + - `Homo_sapiens/GATK/GRCh38` + - `Homo_sapiens/NCBI/GRCh38` + - `Homo_sapiens/UCSC/hg38` +- **Mouse Genome Builds** + - `Mus_musculus/Ensembl/GRCm38` + - `Mus_musculus/UCSC/mm10` diff --git a/docs/sanger.md b/docs/sanger.md index ee75755..ac6df4d 100644 --- a/docs/sanger.md +++ b/docs/sanger.md @@ -2,8 +2,6 @@ To use, run the pipeline with `-profile sanger`. This will download and launch the [`sanger.config`](../conf/sanger.config) which has been pre-configured with a setup suitable for the Wellcome Sanger Institute LSF cluster. -Using this profile, either a docker image containing all of the required software will be downloaded, and converted to a Singularity image or -a Singularity image downloaded directly before execution of the pipeline. ## Running the workflow on the Wellcome Sanger Institute cluster @@ -14,10 +12,12 @@ The latest version of Nextflow is not installed by default on the cluster. You w A recommended place to move the `nextflow` executable to is `~/bin` so that it's in the `PATH`. Nextflow manages each process as a separate job that is submitted to the cluster by using the `bsub` command. -Since the Nextflow pipeline will submit individual jobs for each process to the cluster and dependencies will be provided bu Singularity images you shoudl make sure that your account has access to the Singularity binary by adding these lines to your `.bashrc` file + +If asking Nextflow to use Singularity to run the individual jobs, +you should make sure that your account has access to the Singularity binary by adding these lines to your `.bashrc` file ```bash -[[ -f /software/pathogen/farm5 ]] && module load ISG/singularity +[[ -f /software/modules/ISG/singularity ]] && module load ISG/singularity ``` Nextflow shouldn't run directly on the submission node but on a compute node. @@ -26,16 +26,16 @@ To do so make a shell script with a similar structure to the following code and ```bash #!/bin/bash #BSUB -o /path/to/a/log/dir/%J.o -#BSUB -e /path/to/a/log/dir//%J.e +#BSUB -e /path/to/a/log/dir/%J.e #BSUB -M 8000 -#BSUB -q long -#BSUB -n 4 +#BSUB -q oversubscribed +#BSUB -n 2 export HTTP_PROXY='http://wwwcache.sanger.ac.uk:3128' export HTTPS_PROXY='http://wwwcache.sanger.ac.uk:3128' export NXF_ANSI_LOG=false export NXF_OPTS="-Xms8G -Xmx8G -Dnxf.pool.maxThreads=2000" -export NXF_VER=21.04.0-edge +export NXF_VER=22.04.0-5697 nextflow run \ diff --git a/nfcore_custom.config b/nfcore_custom.config index 88e7227..6f0ac6c 100644 --- a/nfcore_custom.config +++ b/nfcore_custom.config @@ -49,6 +49,7 @@ profiles { lugh { includeConfig "${params.custom_config_base}/conf/lugh.config" } maestro { includeConfig "${params.custom_config_base}/conf/maestro.config" } marvin { includeConfig "${params.custom_config_base}/conf/marvin.config" } + medair { includeConfig "${params.custom_config_base}/conf/medair.config" } mjolnir_globe { includeConfig "${params.custom_config_base}/conf/mjolnir_globe.config" } mpcdf { includeConfig "${params.custom_config_base}/conf/mpcdf.config" } munin { includeConfig "${params.custom_config_base}/conf/munin.config" } @@ -59,6 +60,7 @@ profiles { phoenix { includeConfig "${params.custom_config_base}/conf/phoenix.config" } prince { includeConfig "${params.custom_config_base}/conf/prince.config" } rosalind { includeConfig "${params.custom_config_base}/conf/rosalind.config" } + sage { includeConfig "${params.custom_config_base}/conf/sage.config" } sahmri { includeConfig "${params.custom_config_base}/conf/sahmri.config" } sanger { includeConfig "${params.custom_config_base}/conf/sanger.config"} seg_globe { includeConfig "${params.custom_config_base}/conf/seg_globe.config"} diff --git a/pipeline/sarek.config b/pipeline/sarek.config index 57d7bdf..512541e 100644 --- a/pipeline/sarek.config +++ b/pipeline/sarek.config @@ -15,4 +15,4 @@ profiles { cfc { includeConfig "${params.custom_config_base}/conf/pipeline/sarek/cfc.config" } cfc_dev { includeConfig "${params.custom_config_base}/conf/pipeline/sarek/cfc.config" } eddie { includeConfig "${params.custom_config_base}/conf/pipeline/sarek/eddie.config" } -} \ No newline at end of file +}