diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 5fb2601..0609a85 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -16,7 +16,7 @@ jobs: needs: test_all_profiles strategy: matrix: - profile: ['awsbatch', 'bigpurple', 'binac', 'cbe', 'ccga_dx', 'ccga_med', 'ccga', 'cfc', 'crick', 'denbi_qbic', 'genotoul', 'genouest', 'gis', 'hebbe', 'kraken', 'munin', 'pasteur', 'phoenix', 'prince', 'shh', 'uct_hex', 'uppmax', 'uzh'] + profile: ['awsbatch', 'bi','bigpurple', 'binac', 'cbe', 'ccga_dx', 'ccga_med', 'cfc', 'cfc_dev', 'crick', 'denbi_qbic', 'ebc', 'genotoul', 'genouest', 'gis', 'google', 'hebbe', 'kraken', 'munin', 'pasteur', 'phoenix', 'prince', 'shh', 'uct_hex', 'uppmax', 'utd_ganymede', 'uzh'] steps: - uses: actions/checkout@v1 - name: Install Nextflow @@ -26,4 +26,5 @@ jobs: - name: Check ${{ matrix.profile }} profile env: SCRATCH: '~' + NXF_GLOBAL_CONFIG: awsbatch.config run: nextflow run ${GITHUB_WORKSPACE}/configtest.nf --custom_config_base=${GITHUB_WORKSPACE} -profile ${{ matrix.profile }} diff --git a/README.md b/README.md index 0e58b09..e5b199c 100644 --- a/README.md +++ b/README.md @@ -15,6 +15,7 @@ A repository for hosting Nextflow configuration files containing custom paramete * [Documentation](#documentation) * [Uploading to `nf-core/configs`](#uploading-to-nf-coreconfigs) * [Adding a new pipeline-specific config](#adding-a-new-pipeline-specific-config) + * [Pipeline-specific institutional documentation](#pipeline-specific-institutional-documentation) * [Pipeline-specific documentation](#pipeline-specific-documentation) * [Enabling pipeline-specific configs within a pipeline](#enabling-pipeline-specific-configs-within-a-pipeline) * [Create the pipeline-specific `nf-core/configs` files](#create-the-pipeline-specific-nf-coreconfigs-files) @@ -95,19 +96,21 @@ Currently documentation is available for the following systems: * [AWSBATCH](docs/awsbatch.md) * [BIGPURPLE](docs/bigpurple.md) +* [BI](docs/bi.md) * [BINAC](docs/binac.md) * [CBE](docs/cbe.md) -* [CCGA](docs/ccga.md) * [CCGA_DX](docs/ccga_dx.md) * [CCGA_MED](docs/ccga_med.md) -* [CFC](docs/binac.md) +* [CFC](docs/cfc.md) * [CRICK](docs/crick.md) * [CZBIOHUB_AWS](docs/czbiohub.md) * [CZBIOHUB_AWS_HIGHPRIORITY](docs/czbiohub.md) * [DENBI_QBIC](docs/denbi_qbic.md) +* [EBC](docs/ebc.md) * [GENOTOUL](docs/genotoul.md) * [GENOUEST](docs/genouest.md) * [GIS](docs/gis.md) +* [GOOGLE](docs/google.md) * [HEBBE](docs/hebbe.md) * [KRAKEN](docs/kraken.md) * [MUNIN](docs/munin.md) @@ -117,6 +120,7 @@ Currently documentation is available for the following systems: * [SHH](docs/shh.md) * [UCT_HEX](docs/uct_hex.md) * [UPPMAX](docs/uppmax.md) +* [UTD_GANYMEDE](docs/utd_ganymede.md) * [UZH](docs/uzh.md) ### Uploading to `nf-core/configs` @@ -157,18 +161,28 @@ Each configuration file will add new params and overwrite the params already exi Note that pipeline-specific configs are not required and should only be added if needed. -### Pipeline-specific documentation +### Pipeline-specific institutional documentation -Currently documentation is available for the following pipeline within the specific profile: +Currently documentation is available for the following pipelines within specific profiles: * ampliseq * [BINAC](docs/pipeline/ampliseq/binac.md) + * [UPPMAX](docs/pipeline/ampliseq/uppmax.md) * eager * [SHH](docs/pipeline/eager/shh.md) +* rnafusion + * [MUNIN](docs/pipeline/rnafusion/munin.md) * sarek * [MUNIN](docs/pipeline/sarek/munin.md) * [UPPMAX](docs/pipeline/sarek/uppmax.md) +### Pipeline-specific documentation + +Currently documentation is available for the following pipeline: + +* viralrecon + * [genomes](docs/pipeline/viralrecon/genomes.md) + ### Enabling pipeline-specific configs within a pipeline :warning: **This has to be done on a fork of the `nf-core/` repository.** diff --git a/conf/bi.config b/conf/bi.config new file mode 100644 index 0000000..f9dfb27 --- /dev/null +++ b/conf/bi.config @@ -0,0 +1,18 @@ +params{ + config_profile_description = 'Boehringer Ingelheim internal profile provided by nf-core/configs.' + config_profile_contact = 'Alexander Peltzer (@apeltzer)' + config_profile_url = 'https://www.boehringer-ingelheim.com/' +} +params.globalConfig = determine_global_config() + +includeConfig params.globalConfig + +def determine_global_config() { + if( System.getenv('NXF_GLOBAL_CONFIG') == null) + { + def errorMessage = "ERROR: Environment variable NXF_GLOBAL_CONFIG is missing. Set it to point to global.config file." + System.err.println(errorMessage) + throw new Exception(errorMessage) + } + return System.getenv('NXF_GLOBAL_CONFIG') +} diff --git a/conf/cbe.config b/conf/cbe.config index ea24e5b..5888202 100755 --- a/conf/cbe.config +++ b/conf/cbe.config @@ -14,7 +14,7 @@ process { singularity { enabled = true - cacheDir = '/scratch-cbe/shared/containers' + cacheDir = '/resources/containers' } params { diff --git a/conf/ccga.config b/conf/ccga.config deleted file mode 100644 index 6163626..0000000 --- a/conf/ccga.config +++ /dev/null @@ -1,41 +0,0 @@ -//Profile config names for nf-core/configs -params { - config_profile_description = 'CCGA cluster profile provided by nf-core/configs.' - config_profile_contact = 'Marc Hoeppner (@marchoeppner)' - config_profile_url = 'https://www.ccga.uni-kiel.de/' -} - -/* - * ------------------------------------------------- - * Nextflow config file for CCGA cluster in Kiel - * ------------------------------------------------- - */ - -singularity { - enabled = true - runOptions = "-B /ifs -B /scratch -B /work_beegfs" - cacheDir = "/ifs/data/nfs_share/ikmb_repository/singularity_cache/" -} - -executor { - queueSize=100 -} - -process { - - // Global process config - executor = 'slurm' - queue = 'ikmb_a' - - clusterOptions = { "--qos=ikmb_a" } - -} - -params { - // illumina iGenomes reference file paths on RZCluster - igenomes_base = '/ifs/data/nfs_share/ikmb_repository/references/iGenomes/references/' - saveReference = true - max_memory = 128.GB - max_cpus = 16 - max_time = 120.h -} diff --git a/conf/cfc.config b/conf/cfc.config index 763dcf0..ffe0395 100644 --- a/conf/cfc.config +++ b/conf/cfc.config @@ -1,7 +1,7 @@ //Profile config names for nf-core/configs params { config_profile_description = 'QBiC Core Facility cluster profile provided by nf-core/configs.' - config_profile_contact = 'Alexander Peltzer (@apeltzer)' + config_profile_contact = 'Gisela Gabernet (@ggabernet)' config_profile_url = 'http://qbic.uni-tuebingen.de/' } @@ -13,6 +13,8 @@ singularity { process { beforeScript = 'module load devel/singularity/3.4.2' executor = 'slurm' + queue = { task.memory > 60.GB || task.cpus > 20 ? 'qbic' : 'compute' } + scratch = 'true' } weblog{ @@ -22,7 +24,7 @@ weblog{ params { igenomes_base = '/nfsmounts/igenomes' - max_memory = 498.GB - max_cpus = 20 + max_memory = 1999.GB + max_cpus = 128 max_time = 140.h } diff --git a/conf/cfc_dev.config b/conf/cfc_dev.config new file mode 100644 index 0000000..6190314 --- /dev/null +++ b/conf/cfc_dev.config @@ -0,0 +1,29 @@ +//Profile config names for nf-core/configs +params { + config_profile_description = 'QBiC Core Facility cluster dev profile without container cache provided by nf-core/configs.' + config_profile_contact = 'Gisela Gabernet (@ggabernet)' + config_profile_url = 'http://qbic.uni-tuebingen.de/' +} + +singularity { + enabled = true +} + +process { + beforeScript = 'module load devel/singularity/3.4.2' + executor = 'slurm' + queue = { task.memory > 60.GB || task.cpus > 20 ? 'qbic' : 'compute' } + scratch = 'true' +} + +weblog{ + enabled = true + url = 'https://services.qbic.uni-tuebingen.de/flowstore/workflows' +} + +params { + igenomes_base = '/nfsmounts/igenomes' + max_memory = 1999.GB + max_cpus = 128 + max_time = 140.h +} \ No newline at end of file diff --git a/conf/czbiohub_aws.config b/conf/czbiohub_aws.config index 9d17a48..b32fb6d 100644 --- a/conf/czbiohub_aws.config +++ b/conf/czbiohub_aws.config @@ -50,6 +50,7 @@ params { // No final slash because it's added later gencode_base = "s3://czbiohub-reference/gencode" transgenes_base = "s3://czbiohub-reference/transgenes" + refseq_base = "s3://czbiohub-reference/ncbi/genomes/refseq/" // AWS configurations awsregion = "us-west-2" @@ -79,6 +80,12 @@ params { transcript_fasta = "${params.gencode_base}/mouse/vM21/gencode.vM21.transcripts.ERCC92.fa" star = "${params.gencode_base}/mouse/vM21/STARIndex/" } + 'AaegL5.0' { + fasta = "${params.refseq_base}/invertebrate/Aedes_aegypti/GCF_002204515.2_AaegL5.0/nf-core--rnaseq/reference_genome/GCF_002204515.2_AaegL5.0_genomic.fna" + gtf = "${params.refseq_base}/invertebrate/Aedes_aegypti/GCF_002204515.2_AaegL5.0/nf-core--rnaseq/reference_genome/GCF_002204515.2_AaegL5.0_genomic.gtf" + bed = "${params.refseq_base}/invertebrate/Aedes_aegypti/GCF_002204515.2_AaegL5.0/nf-core--rnaseq/reference_genome/GCF_002204515.2_AaegL5.0_genomic.bed" + star = "${params.refseq_base}/invertebrate/Aedes_aegypti/GCF_002204515.2_AaegL5.0/nf-core--rnaseq/reference_genome/star/" + } } transgenes { diff --git a/conf/ebc.config b/conf/ebc.config new file mode 100644 index 0000000..4df0596 --- /dev/null +++ b/conf/ebc.config @@ -0,0 +1,25 @@ + //Profile config names for nf-core/configs + params { + config_profile_description = 'Generic Estonian Biocentre profile provided by nf-core/configs.' + config_profile_contact = 'Marcel Keller (@marcel-keller)' + config_profile_url = 'https://genomics.ut.ee/en/about-us/estonian-biocentre' + } + + cleanup = true + + conda { + cacheDir = '/ebc_data/nf-core/conda' + } + process { + executor = 'slurm' + conda = "$baseDir/environment.yml" + beforeScript = 'module load nextflow' + } + executor { + queueSize = 16 + } + params { + max_memory = 12.GB + max_cpus = 20 + max_time = 120.h + } diff --git a/conf/google.config b/conf/google.config new file mode 100644 index 0000000..4333d86 --- /dev/null +++ b/conf/google.config @@ -0,0 +1,21 @@ +// Nextflow config file for running on Google Cloud Life Sciences +params { + config_profile_description = 'Google Cloud Life Sciences Profile' + config_profile_contact = 'Evan Floden, Seqera Labs (@evanfloden)' + config_profile_url = 'https://cloud.google.com/life-sciences' + + google_zone = 'europe-west2-c' + google_bucket = false + google_debug = false + google_preemptible = true +} + +process.executor = 'google-lifesciences' +google.zone = params.google_zone +google.lifeSciences.debug = params.google_debug +workDir = params.google_bucket +google.lifeSciences.preemptible = params.google_preemptible +if (google.lifeSciences.preemptible) { + process.errorStrategy = { task.exitStatus==14 ? 'retry' : 'terminate' } + process.maxRetries = 5 +} \ No newline at end of file diff --git a/conf/pipeline/ampliseq/uppmax.config b/conf/pipeline/ampliseq/uppmax.config new file mode 100644 index 0000000..36a1c3b --- /dev/null +++ b/conf/pipeline/ampliseq/uppmax.config @@ -0,0 +1,15 @@ +// Profile config names for nf-core/configs + +params { + // Specific nf-core/configs params + config_profile_contact = 'Daniel Lundin (daniel.lundin@lnu.se)' + config_profile_description = 'nf-core/ampliseq UPPMAX profile provided by nf-core/configs' +} + +withName: make_SILVA_132_16S_classifier { + clusterOptions = { "-A $params.project -C fat -p node -N 1 ${params.clusterOptions ?: ''}" } +} + +withName: classifier { + clusterOptions = { "-A $params.project -C fat -p node -N 1 ${params.clusterOptions ?: ''}" } +} diff --git a/conf/pipeline/eager/shh.config b/conf/pipeline/eager/shh.config index a34f0e2..94a0403 100644 --- a/conf/pipeline/eager/shh.config +++ b/conf/pipeline/eager/shh.config @@ -5,6 +5,10 @@ params { config_profile_contact = 'James Fellows Yates (@jfy133)' config_profile_description = 'nf-core/eager SHH profile provided by nf-core/configs' igenomes_base = "/projects1/public_data/igenomes/" + + // default BWA + bwaalnn = 0.04 + bwaalnl = 32 } // Specific nf-core/eager process configuration @@ -17,3 +21,27 @@ process { queue = { task.memory > 756.GB ? 'supercruncher' : 'long' } } } + +profiles { + pathogen_loose { + params { + config_profile_description = 'Pathogen (loose) MPI-SHH profile, provided by nf-core/configs.' + bwaalnn = 0.01 + bwaalnl = 16 + } + } + pathogen_strict { + params { + config_profile_description = 'Pathogen (strict) MPI-SHH SDAG profile, provided by nf-core/configs.' + bwaalnn = 0.1 + bwaalnl = 32 + } + } + human { + params { + config_profile_description = 'Human MPI-SHH SDAG profile, provided by nf-core/configs.' + bwaalnn = 0.01 + bwaalnl = 16500 + } + } +} diff --git a/conf/pipeline/rnafusion/munin.config b/conf/pipeline/rnafusion/munin.config new file mode 100644 index 0000000..b18f6ad --- /dev/null +++ b/conf/pipeline/rnafusion/munin.config @@ -0,0 +1,10 @@ +// rnafusion/munin specific profile config + +params { + max_cpus = 24 + max_memory = 256.GB + max_time = 72.h + + // Paths + genomes_base = '/data1/references/rnafusion/dev/' +} diff --git a/conf/pipeline/sarek/munin.config b/conf/pipeline/sarek/munin.config index e4413f2..039d117 100644 --- a/conf/pipeline/sarek/munin.config +++ b/conf/pipeline/sarek/munin.config @@ -1,4 +1,4 @@ -// Profile config names for nf-core/configs +// sarek/munin specific profile config params { // Specific nf-core/configs params @@ -6,17 +6,23 @@ params { config_profile_description = 'nf-core/sarek MUNIN profile provided by nf-core/configs' // Specific nf-core/sarek params - annotation_cache = true - pon = '/data1/PON/vcfs/BTB.PON.vcf.gz' - pon_index = '/data1/PON/vcfs/BTB.PON.vcf.gz.tbi' - snpEff_cache = '/data1/cache/snpEff/' - vep_cache = '/data1/cache/VEP/' + annotation_cache = true + cadd_cache = true + cadd_indels = '/data1/cache/CADD/v1.4/InDels.tsv.gz' + cadd_indels_tbi = '/data1/cache/CADD/v1.4/InDels.tsv.gz.tbi' + cadd_wg_snvs = '/data1/cache/CADD/v1.4/whole_genome_SNVs.tsv.gz' + cadd_wg_snvs_tbi = '/data1/cache/CADD/v1.4/whole_genome_SNVs.tsv.gz.tbi' + pon = '/data1/PON/vcfs/BTB.PON.vcf.gz' + pon_index = '/data1/PON/vcfs/BTB.PON.vcf.gz.tbi' + snpeff_cache = '/data1/cache/snpEff/' + vep_cache = '/data1/cache/VEP/' + vep_cache_version = '95' } // Specific nf-core/sarek process configuration process { withLabel:sentieon { - module = {params.sentieon ? 'sentieon/201808.05' : null} + module = {params.sentieon ? 'sentieon/201911.00' : null} container = {params.sentieon ? null : container} } } diff --git a/conf/pipeline/sarek/uppmax.config b/conf/pipeline/sarek/uppmax.config index 1e467dd..df2f3a2 100644 --- a/conf/pipeline/sarek/uppmax.config +++ b/conf/pipeline/sarek/uppmax.config @@ -9,9 +9,13 @@ params { igenomeIgnore = true genomes_base = params.genome == 'GRCh37' ? '/sw/data/uppnex/ToolBox/ReferenceAssemblies/hg38make/bundle/2.8/b37' : '/sw/data/uppnex/ToolBox/hg38bundle' } - if (hostname ==~ "r.*") { + +def hostname = "hostname".execute().text.trim() + +if (hostname ==~ "r.*") { params.singleCPUmem = 6400.MB } + if (hostname ==~ "i.*") { params.singleCPUmem = 15.GB } diff --git a/conf/pipeline/viralrecon/genomes.config b/conf/pipeline/viralrecon/genomes.config new file mode 100644 index 0000000..d889460 --- /dev/null +++ b/conf/pipeline/viralrecon/genomes.config @@ -0,0 +1,20 @@ +/* + * ------------------------------------------------- + * nfcore/viralrecon custom profile Nextflow config file + * ------------------------------------------------- + * Defines viral reference genomes for all environments. + */ + +params { + // Genome reference file paths + genomes { + 'NC_045512.2' { + fasta = "https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/genome/NC_045512.2/GCF_009858895.2_ASM985889v3_genomic.200409.fna.gz" + gff = "https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/genome/NC_045512.2/GCF_009858895.2_ASM985889v3_genomic.200409.gff.gz" + } + 'MN908947.3' { + fasta = "https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/genome/MN908947.3/GCA_009858895.3_ASM985889v3_genomic.200409.fna.gz" + gff = "https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/genome/MN908947.3/GCA_009858895.3_ASM985889v3_genomic.200409.gff.gz" + } + } +} diff --git a/conf/shh.config b/conf/shh.config index a6106be..8defc1f 100644 --- a/conf/shh.config +++ b/conf/shh.config @@ -3,6 +3,11 @@ params { config_profile_description = 'Generic MPI-SHH cluster(s) profile provided by nf-core/configs.' config_profile_contact = 'James Fellows Yates (@jfy133), Maxime Borry (@Maxibor)' config_profile_url = 'https://shh.mpg.de' + max_memory = 256.GB + max_cpus = 32 + max_time = 720.h + //Illumina iGenomes reference file path + igenomes_base = "/projects1/public_data/igenomes/" } cleanup = true @@ -23,21 +28,17 @@ executor { queueSize = 16 } -params { - max_memory = 256.GB - max_cpus = 32 - max_time = 720.h - //Illumina iGenomes reference file path - igenomes_base = "/projects1/public_data/igenomes/" -} - profiles { cdag { - config_profile_description = 'MPI-SHH CDAG profile, provided by nf-core/configs.' + params { + config_profile_description = 'CDAG MPI-SHH profile, provided by nf-core/configs.' + } } sdag { - config_profile_description = 'MPI-SHH SDAG profile, provided by nf-core/configs.' + params { + config_profile_description = 'SDAG MPI-SHH profile, provided by nf-core/configs.' max_memory = 2.TB max_cpus = 128 + } } } diff --git a/conf/uppmax.config b/conf/uppmax.config index 9eb45fc..037070a 100644 --- a/conf/uppmax.config +++ b/conf/uppmax.config @@ -26,7 +26,7 @@ params { def hostname = "hostname".execute().text.trim() -if (hostname ==~ "b.*") { +if (hostname ==~ "b.*" || hostname ==~ "s.*") { params.max_memory = 109.GB } diff --git a/conf/utd_ganymede.config b/conf/utd_ganymede.config new file mode 100644 index 0000000..465deb5 --- /dev/null +++ b/conf/utd_ganymede.config @@ -0,0 +1,24 @@ +//Profile config names for nf-core/configs +params { + config_profile_description = 'University of Texas at Dallas HPC cluster profile provided by nf-core/configs' + config_profile_contact = 'Edmund Miller(@emiller88)' + config_profile_url = 'http://docs.oithpc.utdallas.edu/' +} + +singularity { + enabled = true + envWhitelist='SINGULARITY_BINDPATH' + autoMounts = true +} + +process { + beforeScript = 'module load singularity/2.4.5' + executor = 'slurm' + queue = 'genomics' +} + +params { + max_memory = 32.GB + max_cpus = 16 + max_time = 48.h +} diff --git a/docs/bi.md b/docs/bi.md new file mode 100644 index 0000000..450d0df --- /dev/null +++ b/docs/bi.md @@ -0,0 +1,9 @@ +# nf-core/configs: BI Configuration + +All nf-core pipelines have been successfully configured for use at Boehringer Ingelheim. + +To use, run the pipeline with `-profile bi`. This will download and launch the [`bi.config`](../conf/bi.config) which has been pre-configured with a setup suitable for the BI systems. Using this profile, a docker image containing all of the required software will be downloaded, and converted to a Singularity image before execution of the pipeline. + +Before running the pipeline you will need to follow the internal documentation to run Nextflow on our systems. Similar to that, you need to set an environment variable `NXF_GLOBAL_CONFIG` to the path of the internal global config which is not publicly available here. + +>NB: Nextflow will need to submit the jobs via the job scheduler to the HPC cluster and as such the commands above will have to be executed on one of the login nodes. If in doubt contact IT. diff --git a/docs/ccga.md b/docs/ccga.md deleted file mode 100644 index 798df29..0000000 --- a/docs/ccga.md +++ /dev/null @@ -1,18 +0,0 @@ -# nf-core/configs: CCGA Configuration - -Deployment and testing of nf-core pipelines at the CCGA cluster is on-going. - -To use, run the pipeline with `-profile ccga`. This will download and launch the [`ccga.config`](../conf/ccga.config) which has been pre-configured with a setup suitable for the CCGA cluster. Using this profile, a docker image containing all of the required software will be downloaded, and converted to a Singularity image before execution of the pipeline. - -Before running the pipeline you will need to load Nextflow and Singularity using the environment module system on the cluster. You can do this by issuing the commands below: - -```bash -## Load Nextflow and Singularity environment modules -module purge -module load IKMB -module load Java/1.8.0 -module load Nextflow -module load singularity3.1.0 -``` - ->NB: Access to the CCGA cluster is restricted to IKMB/CCGA employes. Please talk to Marc Hoeppner to get access (@marchoeppner). diff --git a/docs/cfc.md b/docs/cfc.md index a7f6beb..f96a47d 100644 --- a/docs/cfc.md +++ b/docs/cfc.md @@ -10,8 +10,10 @@ Before running the pipeline you will need to load Nextflow and Singularity using ## Load Nextflow and Singularity environment modules module purge module load devel/java_jdk/1.8.0u121 -module load qbic/singularity_slurm/3.0.3 +module load devel/singularity/3.4.2 ``` >NB: You will need an account to use the HPC cluster CFC in order to run the pipeline. If in doubt contact IT. >NB: Nextflow will need to submit the jobs via the job scheduler to the HPC cluster and as such the commands above will have to be executed on one of the login nodes. If in doubt contact IT. + +The queues are set to be `qbic` or `compute` and will be chosen automatically for you depending on your job submission. diff --git a/docs/ebc.md b/docs/ebc.md new file mode 100644 index 0000000..71f2622 --- /dev/null +++ b/docs/ebc.md @@ -0,0 +1,11 @@ +# nf-core/configs: EBC Configuration + +All nf-core pipelines have been successfully configured for use on the [Estonian Biocentre (EBC)](https://genomics.ut.ee/en/about-us/estonian-biocentre) cluster at the [High Performance Computing Center](https://hpc.ut.ee/en) of the the University of Tartu. +To use, run the pipeline with `-profile ebc`. This will download and launch the [`ebc.config`](../conf/ebc.config) which has been pre-configured with a setup suitable for the EBC cluster. Using this profile, currently, a conda environment containing all of the required software will be downloaded and stored in a central location. + +> :warning: You must install your own [conda binary](conda.io) to run nf-core pipelines in a conda environment. Running with singularity will be added soon. + +The profile will put a maximum job limit of 12 GB, 20 CPUs and a maximum wall time of 120 hours. + +NB: You will need an account to use the HPC cluster on EBC cluster in order to run the pipeline. If in doubt contact IT. +NB: Nextflow will need to submit the jobs via the SLURM scheduler to the HPC cluster and as such the commands above will have to be executed on one of the head nodes. If in doubt contact IT. diff --git a/docs/google.md b/docs/google.md new file mode 100644 index 0000000..9e8c521 --- /dev/null +++ b/docs/google.md @@ -0,0 +1,37 @@ +# nf-core/configs: Google Cloud Life Sciences Configuration + +To be used with the `google` profile by specifying the `-profile google` when running nf-core pipelines. + +![Google Cloud](images/google-cloud-logo.svg "https://cloud.google.com/life-sciences/docs/tutorials/nextflow") + +## Quick Start + +```bash +GOOGLE_APPLICATION_CREDENTIALS=.json +NXF_MODE=google +nextflow run nf-core/rnaseq -profile test,google --google_bucket +``` + +### Required Parameters + +#### `--google-bucket` + +The Google Cloud Storage bucket location to be used as a Nextflow work directory. Can also be specified with (`-w gs://your_bucket/work`). + +### Optional Parameters + +#### `--google-zone` + +The Google zone where the computation is executed in Compute Engine VMs. Multiple zones can be provided separating them by a comma. Default (`europe-west2-c`). + +#### `--google-preemptible` + +Enables the usage of preemptible virtual machines with a retry error statergy for up to 5 retries. Default (`true`). + +#### `--google-debug` + +Copies the /google debug directory from the VM to the task bucket directory. Useful for debugging. Default (`false`). + +## Cloud Life Sciences Setup + +Please refer to the [Google Cloud](https://cloud.google.com/life-sciences/docs/tutorials/nextflow) and [Nextflow](https://www.nextflow.io/docs/latest/google.html#cloud-life-sciences) documentation which describe how to setup the Google Cloud environment. diff --git a/docs/images/google-cloud-logo.svg b/docs/images/google-cloud-logo.svg new file mode 100644 index 0000000..18b0e48 --- /dev/null +++ b/docs/images/google-cloud-logo.svg @@ -0,0 +1,96 @@ + + + + + + + + + + +]> + + + + + + + + +Cloud_Logo_Nav + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/pipeline/ampliseq/binac.md b/docs/pipeline/ampliseq/binac.md index a63e021..fb3403e 100644 --- a/docs/pipeline/ampliseq/binac.md +++ b/docs/pipeline/ampliseq/binac.md @@ -6,7 +6,7 @@ Extra specific configuration for the ampliseq pipeline. To use, run the pipeline with `-profile binac`. -This will download and launch the ampliseq specific [`binac.config`](../conf/pipeline/ampliseq/binac.config) which has been pre-configured with a setup suitable for the BINAC cluster. +This will download and launch the ampliseq specific [`binac.config`](../../../conf/pipeline/ampliseq/binac.config) which has been pre-configured with a setup suitable for the BINAC cluster. Example: `nextflow run nf-core/ampliseq -profile binac` diff --git a/docs/pipeline/ampliseq/uppmax.md b/docs/pipeline/ampliseq/uppmax.md new file mode 100644 index 0000000..7061bdc --- /dev/null +++ b/docs/pipeline/ampliseq/uppmax.md @@ -0,0 +1,17 @@ +# nf-core/configs: uppmax ampliseq specific configuration + +Extra specific configuration for the ampliseq pipeline. + +## Usage + +To use, run the pipeline with `-profile uppmax`. + +This will download and launch the ampliseq specific [`uppmax.config`](../../../conf/pipeline/ampliseq/uppmax.config) which has been pre-configured with a setup suitable for the UPPMAX cluster. + +Example: `nextflow run nf-core/ampliseq -profile uppmax` + +## ampliseq specific configurations for uppmax + +Specific configurations for UPPMAX has been made for ampliseq. + +* Makes sure that a fat node is allocated for training and applying a Bayesian classifier. diff --git a/docs/pipeline/eager/shh.md b/docs/pipeline/eager/shh.md index a0dc092..8377ded 100644 --- a/docs/pipeline/eager/shh.md +++ b/docs/pipeline/eager/shh.md @@ -6,7 +6,7 @@ Extra specific configuration for eager pipeline To use, run the pipeline with `-profile shh`. -This will download and launch the eager specific [`shh.config`](../conf/pipeline/eager/shh.config) which has been pre-configured with a setup suitable for the shh cluster. +This will download and launch the eager specific [`shh.config`](../../../conf/pipeline/eager/shh.config) which has been pre-configured with a setup suitable for the shh cluster. Example: `nextflow run nf-core/eager -profile shh` @@ -14,4 +14,5 @@ Example: `nextflow run nf-core/eager -profile shh` Specific configurations for shh has been made for eager. -* If running with the MALT module turned on, the MALT process by default will be sent to the long queue with a resource requirement minimum of 725GB and 64 cores. If this fails, the process will be tried once more only and sent to the supercruncher queue. The module will not retry after this, and pipeline will fail. +* If running with the MALT module turned on, the MALT process by default will be sent to the long queue with a resource requirement minimum of 725GB and 64 cores. If this fails, the process will be tried once more only and sent to the supercruncher queue. The module will not retry after this, and pipeline will fail. Note, this will only work on SDAG. +* Provides additional group specific profiles, which adapt the `bwa aln` mapping parameters to each context: `pathogens_loose` (`-l 0.01 -n 16`), `pathogens_strict` (`-l 32, -n 0.1`) and `human` (`-l 16500, -n 0.01`). diff --git a/docs/pipeline/rnafusion/munin.md b/docs/pipeline/rnafusion/munin.md new file mode 100644 index 0000000..607db9e --- /dev/null +++ b/docs/pipeline/rnafusion/munin.md @@ -0,0 +1,18 @@ +# nf-core/configs: MUNIN rnafusion specific configuration + +Extra specific configuration for rnafusion pipeline + +## Usage + +To use, run the pipeline with `-profile munin`. + +This will download and launch the rnafusion specific [`munin.config`](../../../conf/pipeline/rnafusion/munin.config) which has been pre-configured with a setup suitable for the `MUNIN` cluster. + +Example: `nextflow run nf-core/rnafusion -profile munin` + +## rnafusion specific configurations for MUNIN + +Specific configurations for `MUNIN` has been made for rnafusion. + +* `cpus`, `memory` and `time` max requirements. +* Paths to specific references and indexes diff --git a/docs/pipeline/sarek/munin.md b/docs/pipeline/sarek/munin.md index ce411c1..5f9511b 100644 --- a/docs/pipeline/sarek/munin.md +++ b/docs/pipeline/sarek/munin.md @@ -6,17 +6,22 @@ Extra specific configuration for sarek pipeline To use, run the pipeline with `-profile munin`. -This will download and launch the sarek specific [`munin.config`](../conf/pipeline/sarek/munin.config) which has been pre-configured with a setup suitable for the MUNIN cluster. +This will download and launch the sarek specific [`munin.config`](../../../conf/pipeline/sarek/munin.config) which has been pre-configured with a setup suitable for the `MUNIN` cluster. Example: `nextflow run nf-core/sarek -profile munin` ## Sarek specific configurations for MUNIN -Specific configurations for MUNIN has been made for sarek. +Specific configurations for `MUNIN` has been made for sarek. -* Params `annotation_cache` set to `true` -* Path to `snpEff_cache`: `/data1/cache/snpEff/` +* Params `annotation_cache` and `cadd_cache` set to `true` +* Params `vep_cache_version` set to `95` +* Path to `snpeff_cache`: `/data1/cache/snpEff/` * Path to `vep_cache`: `/data1/cache/VEP/` * Path to `pon`: `/data1/PON/vcfs/BTB.PON.vcf.gz` * Path to `pon_index`: `/data1/PON/vcfs/BTB.PON.vcf.gz.tbi` +* Path to `cadd_indels`: `/data1/cache/CADD/v1.4/InDels.tsv.gz` +* Path to `cadd_indels_tbi`: `/data1/cache/CADD/v1.4/InDels.tsv.gz.tbi` +* Path to `cadd_wg_snvs`: `/data1/cache/CADD/v1.4/whole_genome_SNVs.tsv.gz` +* Path to `cadd_wg_snvs_tbi`: `/data1/cache/CADD/v1.4/whole_genome_SNVs.tsv.gz.tbi` * Load module `Sentieon` for Processes with `sentieon` labels diff --git a/docs/pipeline/sarek/uppmax.md b/docs/pipeline/sarek/uppmax.md index ab8d85e..e612a41 100644 --- a/docs/pipeline/sarek/uppmax.md +++ b/docs/pipeline/sarek/uppmax.md @@ -6,7 +6,7 @@ Extra specific configuration for sarek pipeline To use, run the pipeline with `-profile uppmax`. -This will download and launch the sarek specific [`uppmax.config`](../conf/pipeline/sarek/uppmax.config) which has been pre-configured with a setup suitable for uppmax clusters. +This will download and launch the sarek specific [`uppmax.config`](../../../conf/pipeline/sarek/uppmax.config) which has been pre-configured with a setup suitable for uppmax clusters. Example: `nextflow run nf-core/sarek -profile uppmax` diff --git a/docs/pipeline/viralrecon/genomes.md b/docs/pipeline/viralrecon/genomes.md new file mode 100644 index 0000000..9577434 --- /dev/null +++ b/docs/pipeline/viralrecon/genomes.md @@ -0,0 +1,9 @@ +# nf-core/configs: viralrecon specific configuration + +Extra specific configuration for viralrecon pipeline + +## Usage + +Will be used automatically when running the pipeline with the shared configs in the nf-core/configs repository + +This will download and launch the viralrecon specific [`viralrecon.config`](../../../conf/pipeline/viralrecon/genomes.config) which has been pre-configured with custom genomes. diff --git a/docs/shh.md b/docs/shh.md index c780f21..6f61426 100644 --- a/docs/shh.md +++ b/docs/shh.md @@ -2,7 +2,7 @@ All nf-core pipelines have been successfully configured for use on the Department of Archaeogenetic's SDAG/CDAG clusters at the [Max Planck Institute for the Science of Human History (MPI-SHH)](http://shh.mpg.de). -To use, run the pipeline with `-profile ssh`. You can further with optimise submissions by specifying which cluster you are using with `-profile shh,sdag` or `-profile ssh,cdag`. This will download and launch the [`shh.config`](../conf/shh.config) which has been pre-configured with a setup suitable for the SDAG and CDAG clusters respectively. Using this profile, a docker image containing all of the required software will be downloaded, and converted to a Singularity image before execution of the pipeline. The image will currently be centrally stored here: +To use, run the pipeline with `-profile shh`. You can further with optimise submissions by specifying which cluster you are using with `-profile shh,sdag` or `-profile shh,cdag`. This will download and launch the [`shh.config`](../conf/shh.config) which has been pre-configured with a setup suitable for the SDAG and CDAG clusters respectively. Using this profile, a docker image containing all of the required software will be downloaded, and converted to a Singularity image before execution of the pipeline. The image will currently be centrally stored here: ```bash /projects1/singularity_scratch/cache/ @@ -10,7 +10,7 @@ To use, run the pipeline with `-profile ssh`. You can further with optimise subm however this will likely change to a read-only directory in the future that will be managed by the IT team. -This configuration will automatically choose the correct SLURM queue (`short`,`medium`,`long`) depending on the time and memory required by each process. `-profile ssh,sdag` additionally allows for submission of jobs to the `supercruncher` queue when a job's requested memory exceeds 756GB. +This configuration will automatically choose the correct SLURM queue (`short`,`medium`,`long`) depending on the time and memory required by each process. `-profile shh,sdag` additionally allows for submission of jobs to the `supercruncher` queue when a job's requested memory exceeds 756GB. >NB: You will need an account and VPN access to use the cluster at MPI-SHH in order to run the pipeline. If in doubt contact the IT team. >NB: Nextflow will need to submit the jobs via SLURM to the clusters and as such the commands above will have to be executed on one of the head nodes. If in doubt contact IT. diff --git a/docs/utd_ganymede.md b/docs/utd_ganymede.md new file mode 100644 index 0000000..81dd46a --- /dev/null +++ b/docs/utd_ganymede.md @@ -0,0 +1,18 @@ +# nf-core/configs: UTD Ganymede Configuration + +All nf-core pipelines have been successfully configured for use on the Ganymede HPC cluster at the [The Univeristy of Texas at Dallas](https://www.utdallas.edu/). + +To use, run the pipeline with `-profile utd_ganymede`. This will download and launch the [`utd_ganymede.config`](../conf/utd_ganymede.config) which has been pre-configured with a setup suitable for the Ganymede HPC cluster. Using this profile, a docker image containing all of the required software will be downloaded, and converted to a Singularity image before execution of the pipeline. + +Before running the pipeline you will need to load Singularity using the environment module system on Ganymede. You can do this by issuing the commands below: + +```bash +## Singularity environment modules +module purge +module load singularity +``` + +All of the intermediate files required to run the pipeline will be stored in the `work/` directory. It is recommended to delete this directory after the pipeline has finished successfully because it can get quite large, and all of the main output files will be saved in the `results/` directory anyway. + +>NB: You will need an account to use the HPC cluster on Ganymede in order to run the pipeline. If in doubt contact Ganymedeadmins. +>NB: Nextflow will need to submit the jobs via SLURM to the HPC cluster and as such the commands above will have to be executed on one of the login nodes. If in doubt contact GanymedeAdmins. diff --git a/nfcore_custom.config b/nfcore_custom.config index a9c0eac..6349a15 100644 --- a/nfcore_custom.config +++ b/nfcore_custom.config @@ -11,20 +11,23 @@ //Please use a new line per include Config section to allow easier linting/parsing. Thank you. profiles { awsbatch { includeConfig "${params.custom_config_base}/conf/awsbatch.config" } + bi { includeConfig "${params.custom_config_base}/conf/bi.config" } bigpurple { includeConfig "${params.custom_config_base}/conf/bigpurple.config" } binac { includeConfig "${params.custom_config_base}/conf/binac.config" } cbe { includeConfig "${params.custom_config_base}/conf/cbe.config" } - ccga { includeConfig "${params.custom_config_base}/conf/ccga.config" } ccga_dx { includeConfig "${params.custom_config_base}/conf/ccga_dx.config" } ccga_med { includeConfig "${params.custom_config_base}/conf/ccga_med.config" } cfc { includeConfig "${params.custom_config_base}/conf/cfc.config" } + cfc_dev { includeConfig "${params.custom_config_base}/conf/cfc_dev.config" } crick { includeConfig "${params.custom_config_base}/conf/crick.config" } czbiohub_aws { includeConfig "${params.custom_config_base}/conf/czbiohub_aws.config" } czbiohub_aws_highpriority { includeConfig "${params.custom_config_base}/conf/czbiohub_aws.config"; includeConfig "${params.custom_config_base}/conf/czbiohub_aws_highpriority.config"} + ebc { includeConfig "${params.custom_config_base}/conf/ebc.config" } icr_davros { includeConfig "${params.custom_config_base}/conf/icr_davros.config" } genotoul { includeConfig "${params.custom_config_base}/conf/genotoul.config" } + google { includeConfig "${params.custom_config_base}/conf/google.config" } denbi_qbic { includeConfig "${params.custom_config_base}/conf/denbi_qbic.config" } genouest { includeConfig "${params.custom_config_base}/conf/genouest.config" } gis { includeConfig "${params.custom_config_base}/conf/gis.config" } @@ -37,6 +40,7 @@ profiles { shh { includeConfig "${params.custom_config_base}/conf/shh.config" } uct_hex { includeConfig "${params.custom_config_base}/conf/uct_hex.config" } uppmax { includeConfig "${params.custom_config_base}/conf/uppmax.config" } + utd_ganymede { includeConfig "${params.custom_config_base}/conf/utd_ganymede.config" } uzh { includeConfig "${params.custom_config_base}/conf/uzh.config" } } @@ -46,11 +50,14 @@ profiles { params { // This is a groovy map, not a nextflow parameter set hostnames = [ + binac: ['.binac.uni-tuebingen.de'], cbe: ['.cbe.vbc.ac.at'], + cfc: ['.hpc.uni-tuebingen.de'], crick: ['.thecrick.org'], icr_davros: ['.davros.compute.estate'], genotoul: ['.genologin1.toulouse.inra.fr', '.genologin2.toulouse.inra.fr'], genouest: ['.genouest.org'], - uppmax: ['.uppmax.uu.se'] + uppmax: ['.uppmax.uu.se'], + utd_ganymede: ['ganymede.utdallas.edu'] ] } diff --git a/pipeline/ampliseq.config b/pipeline/ampliseq.config index 6009d68..9cb19f7 100644 --- a/pipeline/ampliseq.config +++ b/pipeline/ampliseq.config @@ -10,4 +10,5 @@ profiles { binac { includeConfig "${params.custom_config_base}/conf/pipeline/ampliseq/binac.config" } -} \ No newline at end of file + uppmax { includeConfig "${params.custom_config_base}/conf/pipeline/ampliseq/uppmax.config" } +} diff --git a/pipeline/rnafusion.config b/pipeline/rnafusion.config new file mode 100644 index 0000000..2d86d89 --- /dev/null +++ b/pipeline/rnafusion.config @@ -0,0 +1,13 @@ +/* + * ------------------------------------------------- + * nfcore/rnafusion custom profile Nextflow config file + * ------------------------------------------------- + * Config options for custom environments. + * Cluster-specific config options should be saved + * in the conf/pipeline/rnafusion folder and imported + * under a profile name here. + */ + +profiles { + munin { includeConfig "${params.custom_config_base}/conf/pipeline/rnafusion/munin.config" } +} \ No newline at end of file diff --git a/pipeline/viralrecon.config b/pipeline/viralrecon.config new file mode 100644 index 0000000..76a15d1 --- /dev/null +++ b/pipeline/viralrecon.config @@ -0,0 +1,7 @@ +/* + * ------------------------------------------------- + * nfcore/viralrecon custom profile Nextflow config file + * ------------------------------------------------- + */ + +includeConfig "${params.custom_config_base}/conf/pipeline/viralrecon/genomes.config"