diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 3e5c930..65cabe5 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -16,7 +16,7 @@ jobs: needs: test_all_profiles strategy: matrix: - profile: ['abims', 'awsbatch', 'bi','bigpurple', 'binac', 'cbe', 'ccga_dx', 'ccga_med', 'cfc', 'cfc_dev', 'crick', 'denbi_qbic', 'ebc', 'genotoul', 'genouest', 'gis', 'google', 'hebbe', 'icr_davros', 'imperial', 'imperial_mb', 'kraken', 'mpcdf', 'munin', 'oist', 'pasteur', 'phoenix', 'prince', 'seg_globe', 'shh', 'uct_hpc', 'uppmax', 'utd_ganymede', 'uzh'] + profile: ['abims', 'awsbatch', 'bi','bigpurple', 'binac', 'biohpc_gen', 'cbe', 'ccga_dx', 'ccga_med', 'cfc', 'cfc_dev', 'crick', 'denbi_qbic', 'ebc', 'eddie', 'eva', 'genotoul', 'genouest', 'gis', 'google', 'hebbe', 'icr_davros', 'ifb_core', 'imperial', 'imperial_mb', 'jax', 'kraken', 'mpcdf', 'munin', 'oist', 'pasteur', 'phoenix', 'prince', 'sanger', 'seg_globe', 'shh', 'uct_hpc', 'uppmax', 'utd_ganymede', 'uzh'] steps: - uses: actions/checkout@v1 - name: Install Nextflow diff --git a/README.md b/README.md index 45721e8..7637725 100644 --- a/README.md +++ b/README.md @@ -99,6 +99,7 @@ Currently documentation is available for the following systems: * [BIGPURPLE](docs/bigpurple.md) * [BI](docs/bi.md) * [BINAC](docs/binac.md) +* [BIOHPC_GEN](docs/biohpc_gen.md) * [CBE](docs/cbe.md) * [CCGA_DX](docs/ccga_dx.md) * [CCGA_MED](docs/ccga_med.md) @@ -107,12 +108,14 @@ Currently documentation is available for the following systems: * [CZBIOHUB_AWS](docs/czbiohub.md) * [DENBI_QBIC](docs/denbi_qbic.md) * [EBC](docs/ebc.md) +* [EVA](docs/eva.md) * [GENOTOUL](docs/genotoul.md) * [GENOUEST](docs/genouest.md) * [GIS](docs/gis.md) * [GOOGLE](docs/google.md) * [HEBBE](docs/hebbe.md) * [ICR_DAVROS](docs/icr_davros.md) +* [JAX](docs/jax.md) * [KRAKEN](docs/kraken.md) * [MPCDF](docs/mpcdf.md) * [MUNIN](docs/munin.md) @@ -174,6 +177,7 @@ Currently documentation is available for the following pipelines within specific * [UPPMAX](docs/pipeline/ampliseq/uppmax.md) * eager * [SHH](docs/pipeline/eager/shh.md) + * [EVA](docs/pipeline/eager/eva.md) * rnafusion * [MUNIN](docs/pipeline/rnafusion/munin.md) * sarek diff --git a/conf/awsbatch.config b/conf/awsbatch.config index 2e5f83c..a8b61b8 100644 --- a/conf/awsbatch.config +++ b/conf/awsbatch.config @@ -7,10 +7,22 @@ params { awsqueue = false awsregion = 'eu-west-1' awscli = '/home/ec2-user/miniconda/bin/aws' - tracedir = './' +} + +timeline { + overwrite = true +} +report { + overwrite = true +} +trace { + overwrite = true +} +dag { + overwrite = true } process.executor = 'awsbatch' process.queue = params.awsqueue aws.region = params.awsregion -executor.awscli = params.awscli +aws.batch.cliPath = params.awscli diff --git a/conf/biohpc_gen.config b/conf/biohpc_gen.config new file mode 100755 index 0000000..e3f4069 --- /dev/null +++ b/conf/biohpc_gen.config @@ -0,0 +1,27 @@ +//Profile config names for nf-core/configs +params { + config_profile_description = 'BioHPC Genomics (biohpc_gen) cluster profile provided by nf-core/configs' + config_profile_contact = 'Patrick Hüther (@phue)' + config_profile_url = 'https://collab.lmu.de/display/BioHPCGenomics/BioHPC+Genomics' +} + +env { + SLURM_CLUSTERS='biohpc_gen' +} + +process { + executor = 'slurm' + queue = { task.memory <= 1536.GB ? (task.time > 2.d || task.memory > 384.GB ? 'biohpc_gen_production' : 'biohpc_gen_normal') : 'biohpc_gen_highmem' } + beforeScript = 'module use /dss/dsslegfs02/pn73se/pn73se-dss-0000/spack/modules/x86_avx2/linux*' + module = 'charliecloud/0.22:miniconda3' +} + +charliecloud { + enabled = true +} + +params { + params.max_time = 14.d + params.max_cpus = 80 + params.max_memory = 3.TB +} diff --git a/conf/cbe.config b/conf/cbe.config index 0a5763f..18f72dc 100755 --- a/conf/cbe.config +++ b/conf/cbe.config @@ -2,13 +2,13 @@ params { config_profile_description = 'CLIP BATCH ENVIRONMENT (CBE) cluster profile provided by nf-core/configs' config_profile_contact = 'Patrick Hüther (@phue)' - config_profile_url = 'http://www.gmi.oeaw.ac.at/' + config_profile_url = 'https://clip.science' } process { executor = 'slurm' queue = { task.memory <= 170.GB ? 'c' : 'm' } - clusterOptions = { task.time <= 8.h ? '--qos short': task.time <= 48.h ? '--qos medium' : '--qos long' } + clusterOptions = { task.time <= 1.h ? '--qos rapid' : task.time <= 8.h ? '--qos short': task.time <= 48.h ? '--qos medium' : '--qos long' } module = 'anaconda3/2019.10' } diff --git a/conf/crick.config b/conf/crick.config index 338bb76..f9c3197 100755 --- a/conf/crick.config +++ b/conf/crick.config @@ -17,7 +17,7 @@ process { params { max_memory = 224.GB max_cpus = 32 - max_time = 72.h + max_time = '72.h' igenomes_base = '/camp/svc/reference/Genomics/aws-igenomes' } diff --git a/conf/eddie.config b/conf/eddie.config new file mode 100644 index 0000000..70e0dcc --- /dev/null +++ b/conf/eddie.config @@ -0,0 +1,50 @@ +//Profile config names for nf-core/configs +params { + config_profile_description = 'University of Edinburgh (eddie) cluster profile provided by nf-core/configs.' + config_profile_contact = 'Alison Meynert (@ameynert)' + config_profile_url = 'https://www.ed.ac.uk/information-services/research-support/research-computing/ecdf/high-performance-computing' +} + +executor { + name = "sge" + queueSize = "100" +} + +process { + clusterOptions = { task.memory ? "-l h_vmem=${task.memory.bytes/task.cpus}" : null } + scratch = true + penv = { task.cpus > 1 ? "sharedmem" : null } + + // common SGE error statuses + errorStrategy = {task.exitStatus in [143,137,104,134,139,140] ? 'retry' : 'finish'} + maxErrors = '-1' + maxRetries = 3 + + beforeScript = + """ + . /etc/profile.d/modules.sh + module load 'roslin/singularity/3.5.3' + export SINGULARITY_TMPDIR="\$TMPDIR" + """ +} + +params { + saveReference = true + // iGenomes reference base + igenomes_base = '/exports/igmm/eddie/NextGenResources/igenomes' + max_memory = 384.GB + max_cpus = 32 + max_time = 240.h +} + +env { + MALLOC_ARENA_MAX=1 +} + +singularity { + envWhitelist = "SINGULARITY_TMPDIR" + runOptions = '-p' + enabled = true + autoMounts = true +} + diff --git a/conf/eva.config b/conf/eva.config new file mode 100644 index 0000000..ad71e12 --- /dev/null +++ b/conf/eva.config @@ -0,0 +1,51 @@ +//Profile config names for nf-core/configs +params { + config_profile_description = 'Generic MPI-EVA cluster(s) profile provided by nf-core/configs.' + config_profile_contact = 'James Fellows Yates (@jfy133)' + config_profile_url = 'https://eva.mpg.de' +} + +// Preform work directory cleanup after a successful run +cleanup = true + +singularity { + enabled = true + autoMounts = true +} + +process { + executor = 'sge' + penv = 'smp' + queue = 'all.q' +} + +executor { + queueSize = 8 +} + +profiles { + archgen { + params { + igenomes_base = "/projects1/public_data/igenomes/" + config_profile_description = 'MPI-EVA archgen profile, provided by nf-core/configs.' + max_memory = 256.GB + max_cpus = 32 + max_time = 720.h + //Illumina iGenomes reference file path + } + + process { + queue = 'archgen.q' + } + + singularity { + cacheDir = "/mnt/archgen/users/singularity_scratch" + + } + + } + // Profile to deactivate automatic cleanup of work directory after a successful run. Overwrites cleanup option. + debug { + cleanup = false + } +} diff --git a/conf/ifb_core.config b/conf/ifb_core.config new file mode 100644 index 0000000..50a72f4 --- /dev/null +++ b/conf/ifb_core.config @@ -0,0 +1,24 @@ +//Profile config names for nf-core/configs +params { + config_profile_description = 'The IFB core cluster profile' + config_profile_contact = 'https://community.france-bioinformatique.fr' + config_profile_url = 'https://www.france-bioinformatique.fr/' +} + +singularity { + // need one image per execution + enabled = true + runOptions = '-B /shared' +} + +process { + executor = 'slurm' +} + +params { + igenomes_ignore = true + // Max resources requested by a normal node on genotoul. + max_memory = 240.GB + max_cpus = 28 + max_time = 96.h +} diff --git a/conf/imperial.config b/conf/imperial.config index f40d92b..eeb98ae 100644 --- a/conf/imperial.config +++ b/conf/imperial.config @@ -27,7 +27,7 @@ executor { singularity { enabled = true autoMounts = true - runOptions = "-B /rds/,/rdsgpfs/,/rds/general/user/$USER/ephemeral/tmp/:/tmp,/rds/general/user/$USER/ephemeral/tmp/:/var/tmp" + runOptions = "-B /rds/,/rds/general/user/$USER/ephemeral/tmp/:/tmp,/rds/general/user/$USER/ephemeral/tmp/:/var/tmp" } process { diff --git a/conf/jax.config b/conf/jax.config new file mode 100644 index 0000000..7cf790e --- /dev/null +++ b/conf/jax.config @@ -0,0 +1,25 @@ +params { + config_profile_description = 'The Jackson Laboratory Sumner HPC profile provided by nf-core/configs.' + config_profile_contact = 'Asaf Peer (@peera)' + config_profile_url = 'https://jacksonlaboratory.sharepoint.com/sites/ResearchIT/SitePages/Welcome-to-Sumner.aspx' + singularity_cache_dir = '/fastscratch/singularity_cache_nfcore' + } + +executor.$slurm.queueSize = 250 +process { + executor = "slurm" + queue = "compute" + clusterOptions = {task.time < 72.h ? '-q batch' : '-q long'} + module = "slurm" + beforeScript = 'module load singularity' +} +singularity{ + enabled = true + autoMounts = true + cacheDir = params.singularity_cache_dir +} +params { + max_memory = 768.GB + max_cpus = 70 + max_time = 336.h + } diff --git a/conf/mpcdf.config b/conf/mpcdf.config index 481ebc4..c6c3c9c 100644 --- a/conf/mpcdf.config +++ b/conf/mpcdf.config @@ -1,24 +1,28 @@ params { config_profile_description = 'MPCDF HPC profiles (unoffically) provided by nf-core/configs.' config_profile_contact = 'James Fellows Yates (@jfy133)' - config_profile_url = 'https://www.mpcdf.mpg.de/services/computing' + config_profile_url = 'https://www.mpcdf.mpg.de/services/supercomputing' } profiles { cobra { - // Does not have singularity! Conda module must be used, but it is - // recommended to set NXF_CONDA_CACHEDIR var in ~/.bash{_profile,rc} - // To create common cache dir process { - beforeScript = 'module load anaconda/3/2020.02' + beforeScript = 'module load singularity' executor = 'slurm' } executor { queueSize = 8 } + + // Set $NXF_SINGULARITY_CACHEDIR in your ~/.bash_profile + // to stop downloading the same image for every run + singularity { + enabled = true + autoMounts = true + } params { config_profile_description = 'MPCDF cobra profile (unofficially) provided by nf-core/configs.' @@ -28,18 +32,22 @@ profiles { } } raven { - // Does not have singularity! Conda module must be used, but it is - // recommended to set NXF_CONDA_CACHEDIR var in ~/.bash{_profile,rc} - // to create common cache dir process { - beforeScript = 'module load anaconda/3/2020.02' + beforeScript = 'module load singularity' executor = 'slurm' } executor { queueSize = 8 } + + // Set $NXF_SINGULARITY_CACHEDIR in your ~/.bash_profile + // to stop downloading the same image for every run + singularity { + enabled = true + autoMounts = true + } params { config_profile_description = 'MPCDF raven profile (unofficially) provided by nf-core/configs.' @@ -47,5 +55,8 @@ profiles { max_cpus = 192 max_time = 24.h } + } + debug { + cleanup = false } } diff --git a/conf/pipeline/eager/eva.config b/conf/pipeline/eager/eva.config new file mode 100644 index 0000000..2e07d57 --- /dev/null +++ b/conf/pipeline/eager/eva.config @@ -0,0 +1,215 @@ +// Profile config names for nf-core/configs + +params { + // Specific nf-core/configs params + config_profile_contact = 'James Fellows Yates (@jfy133)' + config_profile_description = 'nf-core/eager EVA profile provided by nf-core/configs' +} + +// Specific nf-core/eager process configuration +process { + + beforeScript = 'export _JAVA_OPTIONS="-XX:ParallelGCThreads=1 -XX:+PrintCommandLineFlags"' + + maxRetries = 2 + + // Solution for clusterOptions comes from here: https://github.com/nextflow-io/nextflow/issues/332 + personal toMega conversion + clusterOptions = { "-S /bin/bash -j y -o output.log -l h_vmem=${task.memory.toGiga()}G,virtual_free=${task.memory.toGiga()}G" } + + withLabel:'sc_tiny'{ + cpus = { check_max( 1, 'cpus' ) } + memory = { check_max( 1.GB * task.attempt, 'memory' ) } + time = { task.attempt == 3 ? 1440.h : task.attempt == 2 ? 48.h : 2.h } + } + + withLabel:'sc_small'{ + cpus = { check_max( 1, 'cpus' ) } + memory = { check_max( 4.GB * task.attempt, 'memory' ) } + time = { task.attempt == 3 ? 1440.h : task.attempt == 2 ? 48.h : 2.h } + } + + withLabel:'sc_medium'{ + cpus = { check_max( 1, 'cpus' ) } + memory = { check_max( 8.GB * task.attempt, 'memory' ) } + time = { task.attempt == 3 ? 1440.h : task.attempt == 2 ? 48.h : 2.h } + } + + withLabel:'mc_small'{ + cpus = { check_max( 2, 'cpus' ) } + memory = { check_max( 4.GB * task.attempt, 'memory' ) } + time = { task.attempt == 3 ? 1440.h : task.attempt == 2 ? 48.h : 2.h } + } + + withLabel:'mc_medium' { + cpus = { check_max( 4, 'cpus' ) } + memory = { check_max( 8.GB * task.attempt, 'memory' ) } + time = { task.attempt == 3 ? 1440.h : task.attempt == 2 ? 48.h : 2.h } + } + + withLabel:'mc_large'{ + cpus = { check_max( 8, 'cpus' ) } + memory = { check_max( 16.GB * task.attempt, 'memory' ) } + time = { task.attempt == 3 ? 1440.h : task.attempt == 2 ? 48.h : 2.h } + } + + withLabel:'mc_huge'{ + cpus = { check_max( 32, 'cpus' ) } + memory = { check_max( 256.GB * task.attempt, 'memory' ) } + time = { task.attempt == 3 ? 1440.h : task.attempt == 2 ? 48.h : 2.h } + } + + // Fixes for SGE and Java incompatibility due to Java using more memory than you tell it to use + + withName: makeSeqDict { + clusterOptions = { "-S /bin/bash -v JAVA_OPTS='-XX:ParallelGCThreads=1' -l h_vmem=${(task.memory.toGiga() + 3)}G,virtual_free=${(task.memory.toGiga() + 3)}G" } + } + + withName: fastqc { + clusterOptions = { "-S /bin/bash -l h_vmem=${(task.memory.toGiga() * 2)}G,virtual_free=${(task.memory.toGiga() * 2)}G" } + } + + withName: adapter_removal { + clusterOptions = { "-S /bin/bash -l h_vmem=${(task.memory.toGiga() * 2)}G,virtual_free=${(task.memory.toGiga() * 2)}G" } + } + + withName: dedup { + clusterOptions = { "-S /bin/bash -l h_vmem=${(task.memory.toGiga() * 2)}G,virtual_free=${(task.memory.toGiga() * 2)}G" } + } + + withName: markduplicates { + clusterOptions = { "-S /bin/bash -l h_vmem=${(task.memory.toGiga() + 6)}G,virtual_free=${(task.memory.toGiga() + 6)}G" } + } + + withName: malt { + clusterOptions = { "-S /bin/bash -l h_vmem=${(task.memory.toGiga() * 2)}G,virtual_free=${(task.memory.toGiga() * 2)}G" } + } + + withName: maltextract { + clusterOptions = { "-S /bin/bash -l h_vmem=${(task.memory.toGiga() * 2)}G,virtual_free=${(task.memory.toGiga() * 2)}G" } + } + + withName: multivcfanalyzer { + clusterOptions = { "-S /bin/bash -l h_vmem=${(task.memory.toGiga() * 2)}G,virtual_free=${(task.memory.toGiga() * 2)}G" } + } + + withName: mtnucratio { + clusterOptions = { "-S /bin/bash -l h_vmem=${(task.memory.toGiga() * 2)}G,virtual_free=${(task.memory.toGiga() * 2)}G" } + } + + withName: vcf2genome { + clusterOptions = { "-S /bin/bash -l h_vmem=${(task.memory.toGiga() * 2)}G,virtual_free=${(task.memory.toGiga() * 2)}G" } + } + + withName: qualimap { + clusterOptions = { "-S /bin/bash -l h_vmem=${(task.memory.toGiga() + 6)}G,virtual_free=${(task.memory.toGiga() + 6)}G" } + } + + withName: damageprofiler { + clusterOptions = { "-S /bin/bash -l h_vmem=${(task.memory.toGiga() + 6)}G,virtual_free=${(task.memory.toGiga() + 6)}G" } + } + + withName: circularmapper { + clusterOptions = { "-S /bin/bash -l h_vmem=${(task.memory.toGiga() * 2)}G,virtual_free=${(task.memory.toGiga() * 2)}G" } + } + + withName: circulargenerator { + clusterOptions = { "-S /bin/bash -l h_vmem=${(task.memory.toGiga() * 2)}G,virtual_free=${(task.memory.toGiga() * 2)}G" } + } + + withName: preseq { + clusterOptions = { "-S /bin/bash -l h_vmem=${(task.memory.toGiga() * 2)}G,virtual_free=${(task.memory.toGiga() * 2)}G" } + errorStrategy = { task.exitStatus in [143,137,104,134,139] ? 'retry' : 'ignore' } + } + +} + +profiles { + + big_data { + + params { + // Specific nf-core/configs params + config_profile_contact = 'James Fellows Yates (@jfy133)' + config_profile_description = 'nf-core/eager big-data EVA profile provided by nf-core/configs' + } + + executor { + queueSize = 6 + } + + process { + + maxRetries = 2 + + withName:hostremoval_input_fastq { + cpus = { check_max( 1, 'cpus' ) } + memory = { check_max( 32.GB * task.attempt, 'memory' ) } + time = 1440.h + } + + withLabel:'sc_tiny'{ + cpus = { check_max( 1, 'cpus' ) } + memory = { check_max( 2.GB * task.attempt, 'memory' ) } + time = { task.attempt == 3 ? 1440.h : task.attempt == 2 ? 48.h : 2.h } + } + + withLabel:'sc_small'{ + cpus = { check_max( 1, 'cpus' ) } + memory = { check_max( 8.GB * task.attempt, 'memory' ) } + time = { task.attempt == 3 ? 1440.h : task.attempt == 2 ? 48.h : 2.h } + } + + withLabel:'sc_medium'{ + cpus = { check_max( 1, 'cpus' ) } + memory = { check_max( 16.GB * task.attempt, 'memory' ) } + time = { task.attempt == 3 ? 1440.h : task.attempt == 2 ? 48.h : 2.h } + } + + withLabel:'mc_small'{ + cpus = { check_max( 2, 'cpus' ) } + memory = { check_max( 8.GB * task.attempt, 'memory' ) } + time = { task.attempt == 3 ? 1440.h : task.attempt == 2 ? 48.h : 2.h } + } + + withLabel:'mc_medium' { + cpus = { check_max( 4, 'cpus' ) } + memory = { check_max( 16.GB * task.attempt, 'memory' ) } + time = { task.attempt == 3 ? 1440.h : task.attempt == 2 ? 48.h : 2.h } + } + + withLabel:'mc_large'{ + cpus = { check_max( 8, 'cpus' ) } + memory = { check_max( 32.GB * task.attempt, 'memory' ) } + time = { task.attempt == 3 ? 1440.h : task.attempt == 2 ? 48.h : 2.h } + } + + withLabel:'mc_huge'{ + cpus = { check_max( 32, 'cpus' ) } + memory = { check_max( 512.GB * task.attempt, 'memory' ) } + time = { task.attempt == 3 ? 1440.h : task.attempt == 2 ? 48.h : 2.h } + } + } + } + + pathogen_loose { + params { + config_profile_description = 'Pathogen (loose) MPI-EVA profile, provided by nf-core/configs.' + bwaalnn = 0.01 + bwaalnl = 16 + } + } + pathogen_strict { + params { + config_profile_description = 'Pathogen (strict) MPI-EVA SDAG profile, provided by nf-core/configs.' + bwaalnn = 0.1 + bwaalnl = 32 + } + } + human { + params { + config_profile_description = 'Human MPI-EVA SDAG profile, provided by nf-core/configs.' + bwaalnn = 0.01 + bwaalnl = 16500 + } + } +} \ No newline at end of file diff --git a/conf/pipeline/eager/shh.config b/conf/pipeline/eager/shh.config index 44d1c3c..5046df0 100644 --- a/conf/pipeline/eager/shh.config +++ b/conf/pipeline/eager/shh.config @@ -20,6 +20,12 @@ process { queue = { task.memory > 756.GB ? 'supercruncher' : 'long' } } + withName: circulargenerator { + cpus = { check_max( 1, 'cpus' ) } + memory = { check_max( 4.GB * task.attempt, 'memory' ) } + time = { check_max( 4.h * task.attempt, 'time' ) } + } + withLabel:'sc_tiny'{ cpus = { check_max( 1, 'cpus' ) } memory = { check_max( 1.GB * task.attempt, 'memory' ) } diff --git a/conf/pipeline/rnaseq/eddie.config b/conf/pipeline/rnaseq/eddie.config new file mode 100644 index 0000000..d8e76d0 --- /dev/null +++ b/conf/pipeline/rnaseq/eddie.config @@ -0,0 +1,15 @@ +process { + +withName : "PICARD_MARKDUPLICATES" { + clusterOptions = {"-l h_vmem=${(task.memory + 4.GB).bytes/task.cpus}"} +} + +withName : "QUALIMAP_RNASEQ" { + clusterOptions = {"-l h_vmem=${(task.memory + 4.GB).bytes/task.cpus}"} +} + +withName : "FASTQC" { + clusterOptions = {"-l h_vmem=${(task.memory + 4.GB).bytes/task.cpus}"} +} + +} diff --git a/conf/pipeline/sarek/eddie.config b/conf/pipeline/sarek/eddie.config new file mode 100644 index 0000000..9629337 --- /dev/null +++ b/conf/pipeline/sarek/eddie.config @@ -0,0 +1,51 @@ +process { + + withName:MapReads { + cpus = 16 + } + withName:BuildDict { + cpus = 1 + clusterOptions = {"-l h_vmem=${(task.memory + 4.GB).bytes/task.cpus}"} + } + withName:BamQC { + cpus = 8 + memory = 128.GB + clusterOptions = {"-l h_vmem=${(task.memory + 8.GB).bytes/task.cpus}"} + } + withName:MarkDuplicates { + clusterOptions = {"-l h_vmem=${(task.memory + 4.GB).bytes/task.cpus}"} + } + withName:BaseRecalibrator { + clusterOptions = {"-l h_vmem=${(task.memory + 4.GB).bytes/task.cpus}"} + } + withName:ApplyBQSR { + clusterOptions = {"-l h_vmem=${(task.memory + 4.GB).bytes/task.cpus}"} + } + withName:GatherBQSRReports { + clusterOptions = {"-l h_vmem=${(task.memory + 4.GB).bytes/task.cpus}"} + } + withName:HaplotypeCaller { + clusterOptions = {"-l h_vmem=${(task.memory + 4.GB).bytes/task.cpus}"} + } + withName:GenotypeGVCFs { + clusterOptions = {"-l h_vmem=${(task.memory + 4.GB).bytes/task.cpus}"} + } + withName:Mutect2 { + clusterOptions = {"-l h_vmem=${(task.memory + 4.GB).bytes/task.cpus}"} + } + withName:MergeMutect2Stats { + clusterOptions = {"-l h_vmem=${(task.memory + 4.GB).bytes/task.cpus}"} + } + withName:PileupSummariesForMutect2 { + clusterOptions = {"-l h_vmem=${(task.memory + 4.GB).bytes/task.cpus}"} + } + withName:MergePileupSummaries { + clusterOptions = {"-l h_vmem=${(task.memory + 4.GB).bytes/task.cpus}"} + } + withName:CalculateContamination { + clusterOptions = {"-l h_vmem=${(task.memory + 4.GB).bytes/task.cpus}"} + } + withName:FilterMutect2Calls { + clusterOptions = {"-l h_vmem=${(task.memory + 4.GB).bytes/task.cpus}"} + } +} diff --git a/conf/sanger.config b/conf/sanger.config new file mode 100644 index 0000000..63952c8 --- /dev/null +++ b/conf/sanger.config @@ -0,0 +1,32 @@ +params { + config_profile_description = 'The Wellcome Sanger Institute HPC cluster profile' + config_profile_contact = 'Anthony Underwood (@aunderwo)' + config_profile_url = 'https://www.sanger.ac.uk/group/informatics-support-group/' +} + +singularity { + enabled = true + cacheDir = "${baseDir}/singularity" + runOptions = '--bind /lustre --bind /nfs/pathnfs01 --bind /nfs/pathnfs02 --bind /nfs/pathnfs03 --bind /nfs/pathnfs04 --bind /nfs/pathnfs05 --bind /nfs/pathnfs06 --no-home' +} + +process{ + executor = 'lsf' + queue = 'normal' + errorStrategy = { task.attempt <= 5 ? "retry" : "finish" } + process.maxRetries = 5 +} + +executor{ + name = 'lsf' + perJobMemLimit = true + poolSize = 4 + submitRateLimit = '5 sec' + killBatchSize = 50 +} + +params { + max_memory = 128.GB + max_cpus = 64 + max_time = 12.h +} diff --git a/conf/uppmax.config b/conf/uppmax.config index bc3d8c4..3b518d4 100644 --- a/conf/uppmax.config +++ b/conf/uppmax.config @@ -1,8 +1,11 @@ -//Profile config names for nf-core/configs +// Profile config names for nf-core/configs params { config_profile_description = 'Swedish UPPMAX cluster profile provided by nf-core/configs.' config_profile_contact = 'Phil Ewels (@ewels)' config_profile_url = 'https://www.uppmax.uu.se/' + project = null + clusterOptions = null + schema_ignore_params = "genomes,input_paths,cluster-options,clusterOptions,project" } singularity { diff --git a/conf/utd_ganymede.config b/conf/utd_ganymede.config index 409dc0a..01bb10b 100644 --- a/conf/utd_ganymede.config +++ b/conf/utd_ganymede.config @@ -5,6 +5,11 @@ params { config_profile_url = 'http://docs.oithpc.utdallas.edu/' } +env { + TMPDIR = '/home/$USER/scratch/tmp' + SINGULARITY_CACHEDIR = '/home/$USER/scratch/tmp' +} + singularity { enabled = true envWhitelist='SINGULARITY_BINDPATH' @@ -15,6 +20,28 @@ process { beforeScript = 'module load singularity/3.2.1' executor = 'slurm' queue = { task.memory >= 32.GB && task.cpu <= 12 ? 'Kim': task.memory <= 24.GB && task.cpu <= 8 ? 'smallmem' : 'genomics' } + + withName:TRIMGALORE { + memory = 31.GB + } + + withLabel:process_low { + cpus = { check_max( 2 * task.attempt, 'cpus' ) } + memory = { check_max( 12.GB * task.attempt, 'memory' ) } + time = { check_max( 6.h * task.attempt, 'time' ) } + } + + withLabel:process_medium { + cpus = { check_max( 16 * task.attempt, 'cpus' ) } + memory = { check_max( 31.GB * task.attempt, 'memory' ) } + time = { check_max( 8.h * task.attempt, 'time' ) } + } + + withLabel:process_high { + cpus = { check_max( 12 * task.attempt, 'cpus' ) } + memory = { check_max( 120.GB * task.attempt, 'memory' ) } + time = { check_max( 16.h * task.attempt, 'time' ) } + } } params { diff --git a/conf/wcm.config b/conf/wcm.config new file mode 100644 index 0000000..38cd3d1 --- /dev/null +++ b/conf/wcm.config @@ -0,0 +1,28 @@ +singularityDir = "/athena/elementolab/scratch/reference/.singularity/singularity_images_nextflow" + +params { + config_profile_description = 'Weill Cornell Medicine, Scientific Computing Unit Slurm cluster profile provided by nf-core/configs' + config_profile_contact = 'Ashley Stephen Doane, PhD (@DoaneAS)' + igenomes_base = '/athena/elementolab/scratch/reference/igenomes' +} + +singularity { + enabled = true + envWhitelist='SINGULARITY_BINDPATH' + cacheDir = "/athena/elementolab/scratch/reference/.singularity/singularity_images_nextflow" + autoMounts = true +} + +process { + executor = 'slurm' + queue = 'panda_physbio' + scratch = true + scratch = '/scratchLocal/`whoami`_${SLURM_JOBID}' +} + +params { + max_memory = 32.GB + max_cpus = 8 + max_time = 24.h +} + diff --git a/docs/awsbatch.md b/docs/awsbatch.md index c00acef..daf425b 100644 --- a/docs/awsbatch.md +++ b/docs/awsbatch.md @@ -1,4 +1,6 @@ # nf-core/configs: awsbatch Configuration To be used with `awsbatch`. -Custom queue, region and CLI path can be supplied with `params.awsqueue`, `params.awsregion`, `params.awscli`, respectively. +Custom queue and region can be supplied with `params.awsqueue`, `params.awsregion`, `params.awscli`, respectively. + +Allow `overwrite` of `trace`, `timeline`, `report` and `dag` to allow resuming pipelines. diff --git a/docs/biohpc_gen.md b/docs/biohpc_gen.md new file mode 100644 index 0000000..1078835 --- /dev/null +++ b/docs/biohpc_gen.md @@ -0,0 +1,17 @@ +# nf-core/configs: BioHPC Genomics (BIOHPC_GEN) Configuration + +All nf-core pipelines have been successfully configured for use on the BioHPC Genomics (biohpc_gen) cluster that is housed at the Leibniz Rechenzentrum (LRZ) for research groups at the Faculty of Biology of the Ludwig-Maximilians-University (LMU) in Munich. + +To use, run the pipeline with `-profile biohpc_gen`. This will download and launch the [`biohpc_gen.config`](../conf/biohpc_gen.config) which has been pre-configured with a setup suitable for the biohpc_gen cluster. Using this profile, a docker image containing all of the required software will be downloaded, and converted to a Charliecloud container before execution of the pipeline. + +Before running the pipeline you will need to load Nextflow and Charliecloud using the environment module system on biohpc_gen. You can do this by issuing the commands below: + +```bash +## Load Nextflow and Charliecloud environment modules +module purge +module load nextflow charliecloud/0.22 +``` + +>NB: Charliecloud support requires Nextflow version `21.03.0-edge` or later. +>NB: You will need an account to use the LRZ Linux cluster as well as group access to the biohpc_gen cluster in order to run nf-core pipelines. +>NB: Nextflow will need to submit the jobs via the job scheduler to the HPC cluster and as such the commands above will have to be executed on one of the login nodes. diff --git a/docs/eddie.md b/docs/eddie.md new file mode 100644 index 0000000..e5dbe5c --- /dev/null +++ b/docs/eddie.md @@ -0,0 +1,104 @@ +# nf-core/configs: Eddie Configuration + +nf-core pipelines sarek, rnaseq, and atacseq have all been tested on the University of Edinburgh Eddie HPC. + +## Getting help + +There is a Slack channel dedicated to eddie users on the MRC IGMM Slack: [https://igmm.slack.com/channels/eddie3](https://igmm.slack.com/channels/eddie3) + +## Using the Eddie config profile + +To use, run the pipeline with `-profile eddie` (one hyphen). +This will download and launch the [`eddie.config`](../conf/eddie.config) which has been pre-configured with a setup suitable for the [University of Edinburgh Eddie HPC](https://www.ed.ac.uk/information-services/research-support/research-computing/ecdf/high-performance-computing). + +The configuration file supports running nf-core pipelines with Docker containers running under Singularity by default. Conda is not currently supported. + +```bash +nextflow run nf-core/PIPELINE -profile eddie # ...rest of pipeline flags +``` + +Before running the pipeline you will need to install Nextflow or load it from the module system. Generally the most recent version will be the one you want. If you want to run a Nextflow pipeline that is based on [DSL2](https://www.nextflow.io/docs/latest/dsl2.html), you will need a version that ends with '-edge'. + +To list versions: + +```bash +module avail igmm/apps/nextflow +``` + +To load the most recent version: + +```bash +module load igmm/apps/nextflow +``` + +This config enables Nextflow to manage the pipeline jobs via the SGE job scheduler and using Singularity for software management. + +## Singularity set-up + +Load Singularity from the module system and, if you have access to `/exports/igmm/eddie/NextGenResources`, set the Singularity cache directory to the NextGenResources path below. If some containers for your pipeline run are not present, please contact the [IGMM Data Manager](data.manager@igmm.ed.ac.uk) to have them added. You can add these lines to the file `$HOME/.bashrc`, or you can run these commands before you run an nf-core pipeline. + +If you do not have access to `/exports/igmm/eddie/NextGenResources`, set the Singularity cache directory to somewhere sensible that is not in your `$HOME` area (which has limited space). It will take time to download all the Singularity containers, but you can use this again. + +```bash +module load singularity +export NXF_SINGULARITY_CACHEDIR="/exports/igmm/eddie/NextGenResources/nextflow/singularity" +``` + +Singularity will create a directory `.singularity` in your `$HOME` directory on eddie. Space on `$HOME` is very limited, so it is a good idea to create a directory somewhere else with more room and link the locations. + +```bash +cd $HOME +mkdir /exports/eddie/path/to/my/area/.singularity +ln -s /exports/eddie/path/to/my/area/.singularity .singularity +``` + +## Running Nextflow + +### On a login node + +You can use a qlogin to run Nextflow, if you request more than the default 2GB of memory. Unfortunately you can't submit the initial Nextflow run process as a job as you can't qsub within a qsub. + +```bash +qlogin -l h_vmem=8G +``` + +If your eddie terminal disconnects your Nextflow job will stop. You can run Nextflow as a bash script on the command line using `nohup` to prevent this. + +```bash +nohup ./nextflow_run.sh & +``` + +### On a wild west node - IGMM only + +Wild west nodes on eddie can be accessed via ssh (node2c15, node2c16, node3g22). To run Nextflow on one of these nodes, do it within a [screen session](https://linuxize.com/post/how-to-use-linux-screen/). + +Start a new screen session. + +```bash +screen -S +``` + +List existing screen sessions + +```bash +screen -ls +``` + +Reconnect to an existing screen session + +```bash +screen -r +``` + +## Using iGenomes references + +A local copy of the iGenomes resource has been made available on the Eddie HPC for those with access to `/exports/igmm/eddie/NextGenResources` so you should be able to run the pipeline against any reference available in the `igenomes.config`. +You can do this by simply using the `--genome ` parameter. + +## Adjusting maximum resources + +This config is set for IGMM standard nodes which have 32 cores and 384GB memory. If you are a non-IGMM user, please see the [ECDF specification](https://www.wiki.ed.ac.uk/display/ResearchServices/Memory+Specification) and adjust the `--clusterOptions` flag appropriately, e.g. + +```bash +--clusterOptions "-C mem256GB" --max_memory "256GB" +``` diff --git a/docs/eva.md b/docs/eva.md new file mode 100644 index 0000000..24a964c --- /dev/null +++ b/docs/eva.md @@ -0,0 +1,28 @@ +# nf-core/configs: EVA Configuration + +All nf-core pipelines have been successfully configured for use on the Department of Genetics and Archaeogenetic's clusters at the [Max Planck Institute for Evolutionary Anthropology (MPI-EVA)](http://eva.mpg.de). + +To use, run the pipeline with `-profile eva`. You can further with optimise submissions by specifying which cluster queue you are using e,g, `-profile eva,archgen`. This will download and launch the [`eva.config`](../conf/eva.config) which has been pre-configured with a setup suitable for the `all.q` queue. The number of parallel jobs that run is currently limited to 8. + +Using this profile, a docker image containing all of the required software will be downloaded, and converted to a `singularity` image before execution of the pipeline. The image will currently be centrally stored here: + +## Additional Profiles + +We currently also offer profiles for the different department's specific nodes. + +### archgen + +If you specify `-profile eva,archgen` you will be able to use the nodes available on the `archgen.q` queue. + +Note the following characteristics of this profile: + +- By default, job resources are assigned a maximum number of CPUs of 32, 256 GB maximum memory and 720.h maximum wall time. +- Using this profile will currently store singularity images in a cache under `/mnt/archgen/users/singularity_scratch/cache/`. All archgen users currently have read/write access to this directory, however this will likely change to a read-only directory in the future that will be managed by the IT team. +- Intermediate files will be _automatically_ cleaned up (see `debug` below if you don't want this to happen) on successful run completion. + +>NB: You will need an account and VPN access to use the cluster at MPI-EVA in order to run the pipeline. If in doubt contact the IT team. +>NB: Nextflow will need to submit the jobs via SGE to the clusters and as such the commands above will have to be executed on one of the head nodes. If in doubt contact IT. + +### debug + +This simple profile just turns off automatic clean up of intermediate files. This can be useful for debugging. Specify e.g. with `-profile eva,archgen`. diff --git a/docs/ifb_core.md b/docs/ifb_core.md new file mode 100644 index 0000000..90c0eb1 --- /dev/null +++ b/docs/ifb_core.md @@ -0,0 +1,40 @@ +# nf-core/configs: IFB core Configuration + +All nf-core pipelines have been successfully configured for use on the cluster of the IFB (Institut Francais de Bioinformatique). + +To use, run the pipeline with `-profile ifb_core`. This will download and launch the [`ifb_core.config`](../conf/ifb_core.config) which has been pre-configured with a setup suitable for the IFB core cluster. Using this profile, a docker image containing all of the required software will be downloaded, and converted to a Singularity image before execution of the pipeline. + +## How to use on IFB core + +Before running the pipeline you will need to load Nextflow using the environment module system on IFB core. You can do this by issuing the commands below: + +```bash +# Login to a compute node +srun --pty bash + +## Load Nextflow and Singularity environment modules +module purge +module load nextflow/20.04.1 + +# Run a downloaded/git-cloned nextflow workflow from +nextflow run \\ +nf-core/workflow \\ +-resume +-profile ifb_core \\ +--email my-email@example.org \\ +-c my-specific.config +... + + +# Or use the nf-core client +nextflow run nf-core/rnaseq ... + +``` + +## Databanks + +A local copy of several genomes are available in `/shared/bank` directory. See +our [databank page](https://ifb-elixirfr.gitlab.io/cluster/doc/banks/) +to search for your favorite genome. + +>NB: You will need an account to use the HPC cluster on IFB core in order to run the pipeline. If in doubt contact IT or go to [account page](https://my.cluster.france-bioinformatique.fr/manager2/login). diff --git a/docs/jax.md b/docs/jax.md new file mode 100644 index 0000000..d38cfbe --- /dev/null +++ b/docs/jax.md @@ -0,0 +1,8 @@ +# nf-core/configs: JAX Configuration + +All nf-core pipelines have been successfully configured for use on the JAX Sumner cluster at The Jackson Laboratory. + +To use, run the pipeline with `-profile jax`. This will download and launch the [`jax.config`](../conf/jax.config) which has been pre-configured with a setup suitable for JAX Sumner cluster. Using this profile, a docker image containing all of the required software will be downloaded, and converted to a Singularity image before execution of the pipeline and slurm will be used as well. + +>NB: You will need an account to use the HPC cluster JAX in order to run the pipeline. If in doubt contact IT. +>NB: Nextflow should not be executed on the login nodes. If in doubt contact IT. diff --git a/docs/mpcdf.md b/docs/mpcdf.md index af16d76..73ed52f 100644 --- a/docs/mpcdf.md +++ b/docs/mpcdf.md @@ -4,18 +4,16 @@ All nf-core pipelines have been successfully configured for use on the HPCs at [ > :warning: these profiles are not officially supported by the MPCDF. -To run Nextflow, the `jdk` module must be loaded. To use the nf-core profile(s), run the pipeline with `-profile ,mpcdf`. +To run Nextflow, the `jdk` module must be loaded. To use the nf-core profile(s), run the pipeline with `-profile mpcdf,`. -Currently the following clusters are supported: cobra, raven +Currently profiles for the following clusters are supported: `cobra`, `raven` + +All profiles use `singularity` as the corresponding containerEngine. To prevent repeatedly downloading the same singularity image for every pipeline run, for all profiles we recommend specifying a cache location in your `~/.bash_profile` with the `$NXF_SINGULARITY_CACHEDIR` bash variable. >NB: Nextflow will need to submit the jobs via SLURM to the clusters and as such the commands above will have to be executed on one of the head nodes. Check the [MPCDF documentation](https://www.mpcdf.mpg.de/services/computing). ## cobra -Cobra does not currently support singularity, therefore the anaconda/module is loaded for each process. - -Due to this, we also recommend setting the `$NXF_CONDA_CACHEDIR` to a location of your choice to store all environments (so to prevent nextflow building the environment on every run). - To use: `-profile cobra,mpcdf` Sets the following parameters: @@ -31,10 +29,6 @@ Sets the following parameters: ## raven -Raven does not currently support singularity, therefore `module load anaconda/3/2020.02` is loaded for each process. - -Due to this, we also recommend setting the `$NXF_CONDA_CACHEDIR` to a location of your choice to store all environments (so to prevent nextflow building the environment on every run). - To use: `-profile raven,mpcdf` Sets the following parameters: diff --git a/docs/pipeline/eager/eva.md b/docs/pipeline/eager/eva.md new file mode 100644 index 0000000..a8dc563 --- /dev/null +++ b/docs/pipeline/eager/eva.md @@ -0,0 +1,34 @@ +# nf-core/configs: eva eager specific configuration + +Extra specific configuration for eager pipeline + +## Usage + +To use, run the pipeline with `-profile eva`. + +This will download and launch the eager specific [`eva.config`](../../../conf/pipeline/eager/eva.config) which has been pre-configured with a setup suitable for the MPI-EVA cluster. + +Example: `nextflow run nf-core/eager -profile eva` + +## eager specific configurations for eva + +Specific configurations for eva has been made for eager. + +### General profiles + +- The general MPI-EVA profile runs with default nf-core/eager parameters, but with modifications to account for issues SGE have with Java tools. + +#### big_data + +- This defines larger base computing resources for when working with very deep sequenced or high-endogenous samples. + +### Contextual profiles + +#### Human Pop-Gen + +- `human`: optimised for mapping of human aDNA reads (i.e. bwa aln defaults as `-l 16500, -n 0.01`) + +#### Pathogen + +- `pathogen_loose`: optimised for mapping of human aDNA reads (i.e. bwa aln defaults as `-l 16 -n 0.01`) +- `pathogen_strict`: optimised for mapping of human aDNA reads (i.e. bwa aln defaults as `-l 32, -n 0.1`) diff --git a/docs/pipeline/eager/mpcdf.config b/docs/pipeline/eager/mpcdf.config new file mode 100644 index 0000000..d170f4c --- /dev/null +++ b/docs/pipeline/eager/mpcdf.config @@ -0,0 +1,11 @@ +# nf-core/configs: mpcdf eager specific configuration + +Extra specific configuration for eager pipeline for the `cobra` cluster of the MPCDF + +## Usage + +To use, run the pipeline with `-profile mpcdf,cobra`. + +This will download and launch the eager specific [`mpcdf.config`](../../../conf/pipeline/eager/mpcdf.config) which has been pre-configured with a setup suitable for the mpcdf cluster. + +Currently this only applies to the `cobra` cluster, where maximum resources are adjusted accordingly. diff --git a/docs/sanger.md b/docs/sanger.md new file mode 100644 index 0000000..ee75755 --- /dev/null +++ b/docs/sanger.md @@ -0,0 +1,54 @@ +# nf-core/configs: Wellcome Sanger Institute Configuration + +To use, run the pipeline with `-profile sanger`. This will download and launch the [`sanger.config`](../conf/sanger.config) which has been +pre-configured with a setup suitable for the Wellcome Sanger Institute LSF cluster. +Using this profile, either a docker image containing all of the required software will be downloaded, and converted to a Singularity image or +a Singularity image downloaded directly before execution of the pipeline. + +## Running the workflow on the Wellcome Sanger Institute cluster + +The latest version of Nextflow is not installed by default on the cluster. You will need to install it into a directory you have write access to + +- Install Nextflow : [here](https://www.nextflow.io/docs/latest/getstarted.html#) + +A recommended place to move the `nextflow` executable to is `~/bin` so that it's in the `PATH`. + +Nextflow manages each process as a separate job that is submitted to the cluster by using the `bsub` command. +Since the Nextflow pipeline will submit individual jobs for each process to the cluster and dependencies will be provided bu Singularity images you shoudl make sure that your account has access to the Singularity binary by adding these lines to your `.bashrc` file + +```bash +[[ -f /software/pathogen/farm5 ]] && module load ISG/singularity +``` + +Nextflow shouldn't run directly on the submission node but on a compute node. +To do so make a shell script with a similar structure to the following code and submit with `bsub < $PWD/my_script.sh` + +```bash +#!/bin/bash +#BSUB -o /path/to/a/log/dir/%J.o +#BSUB -e /path/to/a/log/dir//%J.e +#BSUB -M 8000 +#BSUB -q long +#BSUB -n 4 + +export HTTP_PROXY='http://wwwcache.sanger.ac.uk:3128' +export HTTPS_PROXY='http://wwwcache.sanger.ac.uk:3128' +export NXF_ANSI_LOG=false +export NXF_OPTS="-Xms8G -Xmx8G -Dnxf.pool.maxThreads=2000" +export NXF_VER=21.04.0-edge + + +nextflow run \ +/path/to/nf-core/pipeline/main.nf \ +-w /path/to/some/dir/work \ +-profile sanger \ +-c my_specific.config \ +-qs 1000 \ +-resume + +## clean up on exit 0 - delete this if you want to keep the work dir +status=$? +if [[ $status -eq 0 ]]; then + rm -r /path/to/some/dir/work +fi +``` diff --git a/docs/uppmax.md b/docs/uppmax.md index d9ac5ce..3024cb0 100644 --- a/docs/uppmax.md +++ b/docs/uppmax.md @@ -8,42 +8,47 @@ We have a Slack channel dedicated to UPPMAX users on the nf-core Slack: [https:/ ## Using the UPPMAX config profile +Before running the pipeline you will need to either install `Nextflow` or load it using the environment module system (this can be done with e.g. `module load bioinfo-tools Nextflow/` where `VERSION` is e.g. `20.10`). + To use, run the pipeline with `-profile uppmax` (one hyphen). This will download and launch the [`uppmax.config`](../conf/uppmax.config) which has been pre-configured with a setup suitable for the UPPMAX servers. -Using this profile, a docker image containing all of the required software will be downloaded, and converted to a Singularity image before execution of the pipeline. +It will enable `Nextflow` to manage the pipeline jobs via the `Slurm` job scheduler. +Using this profile, `Docker` image(s) containing required software(s) will be downloaded, and converted to `Singularity` image(s) if needed before execution of the pipeline. + +Recent version of `Nextflow` also support the environment variable `NXF_SINGULARITY_CACHEDIR` which can be used to supply images. +Images for some `nf-core` pipelines are available under `/sw/data/ToolBox/nf-core/` and those can be used by `NXF_SINGULARITY_CACHEDIR=/sw/data/ToolBox/nf-core/; export NXF_SINGULARITY_CACHEDIR`. In addition to this config profile, you will also need to specify an UPPMAX project id. -You can do this with the `--project` flag (two hyphens) when launching nextflow. For example: +You can do this with the `--project` flag (two hyphens) when launching `Nextflow`. +For example: ```bash -nextflow run nf-core/PIPELINE -profile uppmax --project snic2018-1-234 # ..rest of pipeline flags +# Launch a nf-core pipeline with the uppmax profile for the project id snic2018-1-234 +$ nextflow run nf-core/ -profile uppmax --project snic2018-1-234 [...] ``` > NB: If you're not sure what your UPPMAX project ID is, try running `groups` or checking SUPR. -Before running the pipeline you will need to either install Nextflow or load it using the environment module system. +Just run `Nextflow` on a login node and it will handle everything else. -This config enables Nextflow to manage the pipeline jobs via the Slurm job scheduler and using Singularity for software management. +Remember to use `-bg` to launch `Nextflow` in the background, so that the pipeline doesn't exit if you leave your terminal session. +Alternatively, you can also launch `Nextflow` in a `screen` or a `tmux` session. -Just run Nextflow on a login node and it will handle everything else. +## Using AWS iGenomes references -Remember to use `-bg` to launch Nextflow in the background, so that the pipeline doesn't exit if you leave your terminal session. - -## Using iGenomes references - -A local copy of the iGenomes resource has been made available on all UPPMAX clusters so you should be able to run the pipeline against any reference available in the `igenomes.config`. +A local copy of the `AWS iGenomes` resource has been made available on all UPPMAX clusters so you should be able to run the pipeline against any reference available in the `conf/igenomes.config`. You can do this by simply using the `--genome ` parameter. ## Getting more memory -If your nf-core pipeline run is running out of memory, you can run on a fat node with more memory using the following nextflow flags: +If your `nf-core` pipeline run is running out of memory, you can run on a fat node with more memory using the following `Nextflow` flags: ```bash ---clusterOptions "-C mem256GB" --max_memory "256GB" +--clusterOptions "-C mem256GB -p node" --max_memory "256GB" ``` This raises the ceiling of available memory from the default of `128.GB` to `256.GB`. -Rackham has nodes with 128GB, 256GB and 1TB memory available. +`rackham` has nodes with 128GB, 256GB and 1TB memory available. Note that each job will still start with the same request as normal, but restarted attempts with larger requests will be able to request greater amounts of memory. @@ -53,15 +58,15 @@ All jobs will be submitted to fat nodes using this method, so it's only for use The UPPMAX nf-core configuration profile uses the `hostname` of the active environment to automatically apply the following resource limits: +* `rackham` + * cpus available: 20 cpus + * memory available: 125 GB * `bianca` * cpus available: 16 cpus * memory available: 109 GB * `irma` * cpus available: 16 cpus * memory available: 250 GB -* `rackham` - * cpus available: 20 cpus - * memory available: 125 GB ## Development config @@ -74,14 +79,197 @@ It is not suitable for use with real data. To use it, submit with `-profile uppmax,devel`. -## Running on Bianca +## Running on bianca -For security reasons, there is no internet access on Bianca so you can't download from or upload files to the cluster directly. Before running a nf-core pipeline on Bianca you will first have to download the pipeline and singularity images needed elsewhere and transfer them via the wharf area to your Bianca project. +> :warning: For more information, please follow the following guides: +> +> * [UPPMAX `bianca` user guide](http://uppmax.uu.se/support/user-guides/bianca-user-guide/). +> * [nf-core guide for running offline](https://nf-co.re/usage/offline) +> * [nf-core `tools` guide for downloading pipelines for offline use](https://nf-co.re/tools#downloading-pipelines-for-offline-use). +> * [UPPMAX `Singularity` guide](https://www.uppmax.uu.se/support-sv/user-guides/singularity-user-guide/). -You can follow the guide for downloading pipelines [for offline use](https://nf-co.re/tools#downloading-pipelines-for-offline-use). Note that you will have to download the singularity images as well. +For security reasons, there is no internet access on `bianca` so you can't download from or upload files to the cluster directly. +Before running a nf-core pipeline on `bianca` you will first have to download the pipeline and singularity images needed elsewhere and transfer them via the `wharf` area to your own `bianca` project. -Next transfer the pipeline and the singularity images to your project. Before running the pipeline you will have to indicate to nextflow where the singularity images are located by setting `NXF_SINGULARITY_CACHEDIR` : +In this guide, we use `rackham` to download and transfer files to the `wharf` area, but it can also be done on your own computer. +If you use `rackham` to download the pipeline and the singularity containers, we recommend using an interactive session (cf [interactive guide](https://www.uppmax.uu.se/support/faq/running-jobs-faq/how-can-i-run-interactively-on-a-compute-node/)), which is what we do in the following guide. -`export NXF_SINGULARITY_CACHEDIR=Your_Location_For_The_Singularity_directory/.` +### Download and install Nextflow -You should now be able to run your nf-core pipeline on bianca. +You can use the `Nextflow` UPPMAX provided `module`, but if necessary, you can also download a more recent version. + +```bash +# Connect to bianca +$ ssh -A -@bianca.uppmax.uu.se + +# See the available versions for the module +module spider Nextflow + +# Load a specific version of the Nextflow module +module load bioinfo-tools Nextflow/` +``` + +```bash +# Connect to rackham +$ ssh -X @rackham.uppmax.uu.se +# Or stay in your terminal + +# Download the nextflow-all bundle +$ wget https://github.com/nextflow-io/nextflow/releases/download/v/nextflow--all + +# Connect to the wharf area using sftp +$ sftp -@bianca-sftp.uppmax.uu.se:- + +# Transfer nextflow to the wharf area +sftp> put nextflow--all . + +# Exit sftp +$ exit + +# Connect to bianca +$ ssh -A -@bianca.uppmax.uu.se + +# Go to your project +$ cd /castor/project/proj_nobackup + +# Make folder for Nextflow +$ mkdir tools +$ mkdir tools/nextflow + +# Move Nextflow from the wharf area to its directory +$ mv /castor/project/proj_nobackup/wharf//-/nextflow--all /castor/project/proj_nobackup/tools/nextflow + +# Establish permission +$ chmod a+x /castor/project/proj_nobackup/tools/nextflow/nextflow--all + +# If you want other people to use it +# Be sure that your group has rights to the directory as well +$ chown -R . /castor/project/proj_nobackup/tools/nextflow/nextflow--all + +# Make a link to it +$ ln -s /castor/project/proj_nobackup/tools/nextflow/nextflow--all /castor/project/proj_nobackup/tools/nextflow/nextflow + +# And every time you're launching Nextflow, don't forget to export the following ENV variables +# Or add them to your .bashrc file +$ export NXF_HOME=/castor/project/proj/nobackup/tools/nextflow/ +$ export PATH=${NXF_HOME}:${PATH} +$ export NXF_TEMP=$SNIC_TMP +$ export NXF_LAUNCHER=$SNIC_TMP +$ export NXF_SINGULARITY_CACHEDIR=/castor/project/proj_nobackup/singularity-images +``` + +### Install nf-core tools + +You can use the `nf-core` UPPMAX provided `module`, but if necessary, you can also download a more recent version. + +```bash +# Connect to rackham +$ ssh -X @rackham.uppmax.uu.se + +# See the available versions for the module +module spider nf-core + +# Load a specific version of the nf-core module +module load bioinfo-tools nf-core/` +``` + +```bash +# Connect to rackham +$ ssh -X @rackham.uppmax.uu.se +# Or stay in your terminal + +# Install the latest pip version +$ pip3 install --upgrade --force-reinstall git+https://github.com/nf-core/tools.git@dev --user +``` + +### Download and transfer a nf-core pipeline + +```bash +# Connect to rackham +$ ssh -X @rackham.uppmax.uu.se +# Or stay in your terminal + +# Open an interactive session (if you are on rackham) +$ interactive + +# Download a pipeline with the singularity images +$ nf-core download -r -s --compress none + +# If necessary, extra singularity images can be download separately +# For example, if you downloaded nf-core/sarek, you will need extra images for annotation +# Here we download the nf-core/sarek GRCh38 specific images +$ singularity pull --name nfcore-sareksnpeff-2.7.GRCh38.img docker://nfcore/sareksnpeff:2.7.GRCh38 +$ singularity pull --name nfcore-sarekvep-2.7.GRCh38.img docker://nfcore/sarekvep:2.7.GRCh38 + +# Which can then be moved into the nf-core/sarek download folder +$ mv *.img nf-core-sarek-2.7/singularity-images/. + +# Connect to the wharf area using sftp +$ sftp -@bianca-sftp.uppmax.uu.se:- + +# Transfer folder from rackham to the wharf area +sftp> put -r nf-core-- . + +# The archives will be in the wharf folder in your user home on your bianca project + +# Connect to bianca +$ ssh -A -@bianca.uppmax.uu.se + +# Go to your project +$ cd /castor/project/proj_nobackup + +# Make and go into a nf-core directory (where you will store all nf-core pipelines') +$ mkdir nf-core +$ cd nf-core + +# Move the folder from the wharf area to the project +$ cp /castor/project/proj_nobackup/wharf//-/nf-core-- . + +# If you want other people to use it, +# Be sure that your group has rights to the directory as well +$ chown -R . nf-core-- + +# Make a symbolic link to the extracted repository +$ ln -s nf-core-- nf-core--default +``` + +The principle is to have every member of your project to be able to use the same `nf-core/` version at the same time. +So every member of the project who wants to use `nf-core/` will need to do: + +```bash +# Connect to bianca +$ ssh -A -@bianca.uppmax.uu.se + +# Go to your user directory +$ cd /home/ + +# Make a symbolic link to the default nf-core/ +$ ln -s /castor/project/proj_nobackup/nf-core/nf-core--default nf-core- +``` + +And then `nf-core/` can be used with: + +```bash +# run on bianca +$ nextflow run ~/ -profile uppmax --project --genome ... +``` + +## Update a pipeline + +To update, repeat the same steps as for installing and update the link. + +```bash +# Connect to bianca (Connect to rackham first if needed) +$ ssh -A -@bianca.uppmax.uu.se + +# Go to the nf-core directory in your project +$ cd /castor/project/proj_nobackup/nf-core + +# Remove link +$ unlink nf-core--default + +# Link to new nf-core/ version +$ ln -s nf-core-- nf-core--default +``` + +You can for example keep a `nf-core--default` version that you are sure is working, an make a link for a `nf-core--testing` or `nf-core--development`. diff --git a/docs/wcm.md b/docs/wcm.md new file mode 100644 index 0000000..0785829 --- /dev/null +++ b/docs/wcm.md @@ -0,0 +1,24 @@ +# nf-core/configs: Weill Cornell Medicine Configuration + +All nf-core pipelines have been successfully configured for use on the panda cluster at the WCM. + +To use, run the pipeline with `-profile wcm`. This will download and launch the [`wcm.config`](../conf/wcm.config) which has been pre-configured with a setup suitable for the WCM slurm cluster. Using this profile, a docker image containing all of the required software will be downloaded, and converted to a Singularity image before execution of the pipeline. + +## Running the workflow on the Pasteur cluster + +Nextflow is not installed by default on the WCM cluster. + +- Install Nextflow : [here](https://www.nextflow.io/docs/latest/getstarted.html#) + +Nextflow manages each process as a separate job that is submitted to the cluster by using the `sbatch` command. +Nextflow shouldn't run directly on a login node but on a compute node or lab-specific interactive server when configured as a submit host. + +1. Run nextflow on a compute node or interactive server with submit host capability: + +```bash +# Run nextflow workflow +nextflow run \\ +nf-core/chipseq \\ +-resume \\ +-profile test,wcm +``` diff --git a/nfcore_custom.config b/nfcore_custom.config index 429e3c1..9b4fbda 100644 --- a/nfcore_custom.config +++ b/nfcore_custom.config @@ -15,6 +15,7 @@ profiles { bi { includeConfig "${params.custom_config_base}/conf/bi.config" } bigpurple { includeConfig "${params.custom_config_base}/conf/bigpurple.config" } binac { includeConfig "${params.custom_config_base}/conf/binac.config" } + biohpc_gen { includeConfig "${params.custom_config_base}/conf/biohpc_gen.config" } cbe { includeConfig "${params.custom_config_base}/conf/cbe.config" } ccga_dx { includeConfig "${params.custom_config_base}/conf/ccga_dx.config" } ccga_med { includeConfig "${params.custom_config_base}/conf/ccga_med.config" } @@ -23,7 +24,10 @@ profiles { crick { includeConfig "${params.custom_config_base}/conf/crick.config" } czbiohub_aws { includeConfig "${params.custom_config_base}/conf/czbiohub_aws.config" } ebc { includeConfig "${params.custom_config_base}/conf/ebc.config" } + eddie { includeConfig "${params.custom_config_base}/conf/eddie.config" } + eva { includeConfig "${params.custom_config_base}/conf/eva.config" } icr_davros { includeConfig "${params.custom_config_base}/conf/icr_davros.config" } + ifb_core { includeConfig "${params.custom_config_base}/conf/ifb_core.config" } imperial { includeConfig "${params.custom_config_base}/conf/imperial.config" } imperial_mb { includeConfig "${params.custom_config_base}/conf/imperial_mb.config" } genotoul { includeConfig "${params.custom_config_base}/conf/genotoul.config" } @@ -39,12 +43,14 @@ profiles { pasteur { includeConfig "${params.custom_config_base}/conf/pasteur.config" } phoenix { includeConfig "${params.custom_config_base}/conf/phoenix.config" } prince { includeConfig "${params.custom_config_base}/conf/prince.config" } + sanger { includeConfig "${params.custom_config_base}/conf/sanger.config"} seg_globe { includeConfig "${params.custom_config_base}/conf/seg_globe.config"} shh { includeConfig "${params.custom_config_base}/conf/shh.config" } uct_hpc { includeConfig "${params.custom_config_base}/conf/uct_hpc.config" } uppmax { includeConfig "${params.custom_config_base}/conf/uppmax.config" } utd_ganymede { includeConfig "${params.custom_config_base}/conf/utd_ganymede.config" } uzh { includeConfig "${params.custom_config_base}/conf/uzh.config" } + jax { includeConfig "${params.custom_config_base}/conf/jax.config" } } // If user hostnames contain one of these substring and they are diff --git a/pipeline/eager.config b/pipeline/eager.config index 9242ecb..2827496 100644 --- a/pipeline/eager.config +++ b/pipeline/eager.config @@ -11,5 +11,5 @@ profiles { shh { includeConfig "${params.custom_config_base}/conf/pipeline/eager/shh.config" } mpcdf { includeConfig "${params.custom_config_base}/conf/pipeline/eager/mpcdf.config" } - + eva { includeConfig "${params.custom_config_base}/conf/pipeline/eager/eva.config" } } diff --git a/pipeline/rnaseq.config b/pipeline/rnaseq.config new file mode 100644 index 0000000..1a27463 --- /dev/null +++ b/pipeline/rnaseq.config @@ -0,0 +1,13 @@ +/* + * ------------------------------------------------- + * nfcore/rnaseq custom profile Nextflow config file + * ------------------------------------------------- + * Config options for custom environments. + * Cluster-specific config options should be saved + * in the conf/pipeline/rnaseq folder and imported + * under a profile name here. + */ + +profiles { + eddie { includeConfig "${params.custom_config_base}/conf/pipeline/rnaseq/eddie.config" } +} diff --git a/pipeline/sarek.config b/pipeline/sarek.config index 4ba1f43..57d7bdf 100644 --- a/pipeline/sarek.config +++ b/pipeline/sarek.config @@ -14,4 +14,5 @@ profiles { icr_davros { includeConfig "${params.custom_config_base}/conf/pipeline/sarek/icr_davros.config" } cfc { includeConfig "${params.custom_config_base}/conf/pipeline/sarek/cfc.config" } cfc_dev { includeConfig "${params.custom_config_base}/conf/pipeline/sarek/cfc.config" } + eddie { includeConfig "${params.custom_config_base}/conf/pipeline/sarek/eddie.config" } } \ No newline at end of file