diff --git a/.gitignore b/.gitignore index 07c0144..8aa0735 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,4 @@ work/ data/ results/ .DS_Store +*.code-workspace \ No newline at end of file diff --git a/.travis.yml b/.travis.yml index 918a846..9da38dd 100644 --- a/.travis.yml +++ b/.travis.yml @@ -17,8 +17,8 @@ install: - mkdir -p ${TRAVIS_BUILD_DIR}/tests && cd ${TRAVIS_BUILD_DIR}/tests env: - - NXF_VER='18.10.1' # Specify a minimum NF version that should be tested and work - - NXF_VER='' # Plus: get the latest NF version and check, that it works + - NXF_VER='18.10.1' SCRATCH='~' # Specify a minimum NF version that should be tested and work. Set SCRATCH for prince.config. + - NXF_VER='' SCRATCH='~' # Plus: get the latest NF version and check, that it works. Set SCRATCH for prince.config. script: # Run the pipeline with the test profile and test remote config @@ -26,4 +26,5 @@ script: grep "{.*includeConfig.*[a-z]*\.config\"" ${TRAVIS_BUILD_DIR}/nfcore_custom.config | \ tr -s ' ' | \ cut -d " " -f 2 | \ - xargs -I {} nextflow run ${TRAVIS_BUILD_DIR}/configtest.nf -profile {} + grep -v "czbiohub_aws" | \ + xargs -I {} nextflow run ${TRAVIS_BUILD_DIR}/configtest.nf --custom_config_base=${TRAVIS_BUILD_DIR} -profile {} diff --git a/README.md b/README.md index eadd3bb..451c12f 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,4 @@ - - -# [nf-core/configs](https://github.com/nf-core/configs) +# ![nf-core/configs](docs/images/nfcore-configs_logo.png) [![Build Status](https://travis-ci.org/nf-core/configs.svg?branch=master)](https://travis-ci.org/nf-core/configs) @@ -84,21 +82,28 @@ See [`nf-core/configs/docs`](https://github.com/nf-core/configs/tree/master/docs Currently documentation is available for the following clusters: -* [BINAC](docs/binac.md) * [BIGPURPLE](docs/bigpurple.md) +* [BINAC](docs/binac.md) +* [CBE](docs/cbe.md) * [CCGA](docs/ccga.md) +* [CCGA_DX](/docs/ccga_dx.md) * [CFC](docs/binac.md) * [CRICK](docs/crick.md) +* [CZBIOHUB_AWS](docs/czbiohub.md) +* [CZBIOHUB_AWS_HIGHPRIORITY](docs/czbiohub.md) +* [GENOUEST](docs/genouest.md) * [GIS](docs/gis.md) * [HEBBE](docs/hebbe.md) +* [KRAKEN](docs/kraken.md) * [MENDEL](docs/mendel.md) * [MUNIN](docs/munin.md) +* [PASTEUR](docs/pasteur.md) * [PHOENIX](docs/phoenix.md) * [PRINCE](docs/prince.md) * [SHH](docs/shh.md) * [UCT_HEX](docs/uct_hex.md) -* [UPPMAX-DEVEL](docs/uppmax-devel.md) * [UPPMAX](docs/uppmax.md) +* [UPPMAX_DEVEL](docs/uppmax.md) * [UZH](docs/uzh.md) ### Uploading to `nf-core/configs` @@ -111,4 +116,4 @@ We will be notified automatically when you have created your pull request, and p ## Help -If you have any questions or issues please send us a message on [Slack](https://nf-core-invite.herokuapp.com/). +If you have any questions or issues please send us a message on [Slack](https://nf-co.re/join/slack). diff --git a/conf/binac.config b/conf/binac.config index 5df94bf..9447ca4 100644 --- a/conf/binac.config +++ b/conf/binac.config @@ -10,7 +10,7 @@ singularity { } process { - beforeScript = 'module load devel/singularity/3.0.1' + beforeScript = 'module load devel/singularity/3.0.3' executor = 'pbs' queue = 'short' } @@ -21,3 +21,8 @@ params { max_cpus = 28 max_time = 48.h } + +weblog{ + enabled = true + url = 'https://services.qbic.uni-tuebingen.de/flowstore/workflows' +} diff --git a/conf/cbe.config b/conf/cbe.config new file mode 100755 index 0000000..c303f92 --- /dev/null +++ b/conf/cbe.config @@ -0,0 +1,32 @@ +//Profile config names for nf-core/configs +params { + config_profile_description = 'CLIP BATCH ENVIRONMENT (CBE) cluster profile provided by nf-core/configs' + config_profile_contact = 'Patrick Hüther (@phue)' + config_profile_url = 'http://www.gmi.oeaw.ac.at/' +} + +process { + executor = 'slurm' + module = 'singularity/3.2.1' + queue = 'c' +} + +singularity.enabled = true + +params { + target_qos = 'medium' + params.max_cpus = 36 + params.max_memory = 170.GB + igenomesIgnore = true +} + +if (params.target_qos == 'short') { + params.max_time = 8.h + process.clusterOptions = '--qos short' +} else if (params.target_qos == 'medium') { + params.max_time = 2.d + process.clusterOptions = '--qos medium' +} else { + params.max_time = 14.d + process.clusterOptions = '--qos long' +} diff --git a/conf/ccga.config b/conf/ccga.config index f29db9c..6163626 100644 --- a/conf/ccga.config +++ b/conf/ccga.config @@ -2,15 +2,21 @@ params { config_profile_description = 'CCGA cluster profile provided by nf-core/configs.' config_profile_contact = 'Marc Hoeppner (@marchoeppner)' - config_profile_url = 'https://www.ikmb.uni-kiel.de/' + config_profile_url = 'https://www.ccga.uni-kiel.de/' } /* * ------------------------------------------------- - * Nextflow config file with environment modules for RZCluster in Kiel + * Nextflow config file for CCGA cluster in Kiel * ------------------------------------------------- */ +singularity { + enabled = true + runOptions = "-B /ifs -B /scratch -B /work_beegfs" + cacheDir = "/ifs/data/nfs_share/ikmb_repository/singularity_cache/" +} + executor { queueSize=100 } @@ -29,4 +35,7 @@ params { // illumina iGenomes reference file paths on RZCluster igenomes_base = '/ifs/data/nfs_share/ikmb_repository/references/iGenomes/references/' saveReference = true + max_memory = 128.GB + max_cpus = 16 + max_time = 120.h } diff --git a/conf/ccga_dx.config b/conf/ccga_dx.config new file mode 100644 index 0000000..1f92b0d --- /dev/null +++ b/conf/ccga_dx.config @@ -0,0 +1,38 @@ +//Profile config names for nf-core/configs +params { + config_profile_description = 'CCGA DX cluster profile provided by nf-core/configs.' + config_profile_contact = 'Marc Hoeppner (@marchoeppner)' + config_profile_url = 'https://www.ccga.uni-kiel.de/' +} + +/* + * ------------------------------------------------- + * Nextflow config file for CCGA cluster in Kiel + * ------------------------------------------------- + */ + +singularity { + enabled = true + runOptions = "-B /mnt" +} + +executor { + queueSize=100 +} + +process { + + // Global process config + executor = 'slurm' + queue = 'htc' + +} + +params { + // illumina iGenomes reference file paths on DX Cluster + igenomes_base = '/mnt/ld_ng_out/sukmb352/references/iGenomes/references/' + saveReference = true + max_memory = 250.GB + max_cpus = 20 + max_time = 240.h +} diff --git a/conf/cfc.config b/conf/cfc.config index dd69647..d5b1c3b 100644 --- a/conf/cfc.config +++ b/conf/cfc.config @@ -7,6 +7,7 @@ params { singularity { enabled = true + cacheDir = '/nfsmounts/container' } process { @@ -14,6 +15,11 @@ process { executor = 'slurm' } +weblog{ + enabled = true + url = 'https://services.qbic.uni-tuebingen.de/flowstore/workflows' +} + params { igenomes_base = '/nfsmounts/igenomes' max_memory = 60.GB diff --git a/conf/czbiohub_aws.config b/conf/czbiohub_aws.config new file mode 100644 index 0000000..1dd8da2 --- /dev/null +++ b/conf/czbiohub_aws.config @@ -0,0 +1,129 @@ +/* + * ------------------------------------------------- + * Nextflow config file for Chan Zuckerberg Biohub + * ------------------------------------------------- + * Defines reference genomes, using iGenome paths + * Imported under the default 'standard' Nextflow + * profile in nextflow.config + */ + + //Profile config names for nf-core/configs + params { + config_profile_description = 'Chan Zuckerberg Biohub AWS Batch profile provided by nf-core/configs.' + config_profile_contact = 'Olga Botvinnik (@olgabot)' + config_profile_url = 'https://www.czbiohub.org/' + } + +docker { + enabled = true +} + +process { + executor = 'awsbatch' + queue = 'default-971039e0-830c-11e9-9e0b-02c5b84a8036' + errorStrategy = 'ignore' +} + +workDir = "s3://czb-nextflow/intermediates/" + +aws.region = 'us-west-2' +executor.awscli = '/home/ec2-user/miniconda/bin/aws' +params.tracedir = './' + +params { + saveReference = true + + // Largest SPOT instances available on AWS: https://ec2instances.info/ + max_memory = 1952.GB + max_cpus = 96 + max_time = 240.h + + // Compatible with multiple versions of rnaseq pipeline + seq_center = "czbiohub" + seqCenter = "czbiohub" + + // illumina iGenomes reference file paths on CZ Biohub reference s3 bucket + // No final slash because it's added later + igenomes_base = "s3://czbiohub-reference/igenomes" + + // GENCODE (human + mouse) reference file paths on CZ Biohub reference s3 bucket + // No final slash because it's added later + gencode_base = "s3://czbiohub-reference/gencode" + transgenes_base = "s3://czbiohub-reference/transgenes" + + // AWS configurations + awsregion = "us-west-2" + awsqueue = "nextflow" + + igenomesIgnore = true + + fc_extra_attributes = 'gene_name' + fc_group_features = 'gene_id' + fc_group_features_type = 'gene_type' + + trim_pattern = '_+S\\d+' + + // GENCODE GTF and fasta files + genomes { + 'GRCh38' { + fasta = "${params.gencode_base}/human/v30/GRCh38.p12.genome.ERCC92.fa" + gtf = "${params.gencode_base}/human/v30/gencode.v30.annotation.ERCC92.gtf" + transcript_fasta = "${params.gencode_base}/human/v30/gencode.v30.transcripts.ERCC92.fa" + star = "${params.gencode_base}/human/v30/STARIndex/" + salmon_index = "${params.gencode_base}/human/v30/salmon_index/" + } + 'GRCm38' { + fasta = "${params.gencode_base}/mouse/vM21/GRCm38.p6.genome.ERCC92.fa" + gtf = "${params.gencode_base}/mouse/vM21/gencode.vM21.annotation.ERCC92.gtf" + transcript_fasta = "${params.gencode_base}/mouse/vM21/gencode.vM21.transcripts.ERCC92.fa" + star = "${params.gencode_base}/mouse/vM21/STARIndex/" + } + } + + transgenes { + 'ChR2' { + fasta = "${params.transgenes_base}/ChR2/ChR2.fa" + gtf = "${params.transgenes_base}/ChR2/ChR2.gtf" + } + 'Cre' { + fasta = "${params.transgenes_base}/Cre/Cre.fa" + gtf = "${params.transgenes_base}/Cre/Cre.gtf" + } + 'ERCC' { + fasta = "${params.transgenes_base}/ERCC92/ERCC92.fa" + gtf = "${params.transgenes_base}/ERCC92/ERCC92.gtf" + } + 'GCaMP6m' { + fasta = "${params.transgenes_base}/GCaMP6m/GCaMP6m.fa" + gtf = "${params.transgenes_base}/GCaMP6m/GCaMP6m.gtf" + } + 'GFP' { + fasta = "${params.transgenes_base}/Gfp/Gfp.fa" + gtf = "${params.transgenes_base}/Gfp/Gfp.gtf" + } + 'NpHR' { + fasta = "${params.transgenes_base}/NpHR/NpHR.fa" + gtf = "${params.transgenes_base}/NpHR/NpHR.gtf" + } + 'RCaMP' { + fasta = "${params.transgenes_base}/RCaMP/RCaMP.fa" + gtf = "${params.transgenes_base}/RCaMP/RCaMP.gtf" + } + 'RGECO' { + fasta = "${params.transgenes_base}/RGECO/RGECO.fa" + gtf = "${params.transgenes_base}/RGECO/RGECO.gtf" + } + 'Tdtom' { + fasta = "${params.transgenes_base}/Tdtom/Tdtom.fa" + gtf = "${params.transgenes_base}/Tdtom/Tdtom.gtf" + } + 'Car-T' { + fasta = "${params.transgenes_base}/car-t/car-t.fa" + gtf = "${params.transgenes_base}/car-t/car-t.gtf" + } + 'zsGreen' { + fasta = "${params.transgenes_base}/zsGreen/zsGreen.fa" + gtf = "${params.transgenes_base}/zsGreen/zsGreen.gtf" + } + } +} diff --git a/conf/czbiohub_aws_highpriority.config b/conf/czbiohub_aws_highpriority.config new file mode 100644 index 0000000..5ab796a --- /dev/null +++ b/conf/czbiohub_aws_highpriority.config @@ -0,0 +1,12 @@ +/* + * ------------------------------------------------- + * Nextflow config file for Chan Zuckerberg Biohub + * ------------------------------------------------- + * Defines reference genomes, using iGenome paths + * Imported under the default 'standard' Nextflow + * profile in nextflow.config + */ + +process { + queue = 'highpriority-971039e0-830c-11e9-9e0b-02c5b84a8036' +} diff --git a/conf/genouest.config b/conf/genouest.config new file mode 100644 index 0000000..68d82a6 --- /dev/null +++ b/conf/genouest.config @@ -0,0 +1,23 @@ +//Profile config names for nf-core/configs +params { + config_profile_description = 'The GenOuest cluster profile' + config_profile_contact = 'Anthony Bretaudeau (@abretaud)' + config_profile_url = 'https://www.genouest.org' +} + +singularity { + enabled = true + autoMounts = true + runOptions = '-B /scratch:/scratch -B /local:/local -B /db:/db' +} + +process { + executor = 'slurm' +} + +params { + igenomesIgnore = true + max_memory = 750.GB + max_cpus = 80 + max_time = 336.h +} diff --git a/conf/kraken.config b/conf/kraken.config new file mode 100644 index 0000000..59417e9 --- /dev/null +++ b/conf/kraken.config @@ -0,0 +1,24 @@ +//Profile config names for nf-core/configs +params { + config_profile_name = 'KRAKEN' + config_profile_description = 'Jenkins cluster provided by nf-core/configs.' + config_profile_contact = 'Maxime Garcia or Johannes Alneberg' + config_profile_url = 'kraken.dyn.scilifelab.se' +} + +process { + executor = 'local' +} + +docker { + enabled = true + mountFlags = 'z' + fixOwnership = true +} + +params { + max_memory = 60.GB + max_cpus = 16 + max_time = 72.h + igenomes_base = '/share/igenomes/' +} diff --git a/conf/mendel.config b/conf/mendel.config index aee4ebc..e658c87 100644 --- a/conf/mendel.config +++ b/conf/mendel.config @@ -10,7 +10,7 @@ manifest { } process { - beforeScript = {'module load Singularity; module load Miniconda3'} + beforeScript = 'module load Miniconda3/4.6.7' executor = 'pbspro' clusterOptions = { "-P $params.project" } } diff --git a/conf/munin.config b/conf/munin.config index 5d95f01..f53def4 100644 --- a/conf/munin.config +++ b/conf/munin.config @@ -1,20 +1,21 @@ //Profile config names for nf-core/configs params { - config_profile_description = 'Big iron cluster profile provided by nf-core/configs.' + config_profile_description = 'MUNIN profile provided by nf-core/configs.' config_profile_contact = 'Szilveszter Juhos (@szilva)' config_profile_url = '' } process { executor = 'local' + maxForks = 46 } +// To use singularity, use nextflow run -profile munin,singularity singularity { enabled = true - autoMounts = true } -// To use docker instead of singularity, use nextflow run -profile munin,docker +// To use docker, use nextflow run -profile munin,docker docker { enabled = false mountFlags = 'z' @@ -22,11 +23,11 @@ docker { } params { - saveReference = true - - max_memory = 128.GB - max_cpus = 16 + // general params + max_memory = 752.GB + max_cpus = 46 max_time = 72.h - // illumina iGenomes reference file paths on UPPMAX - igenomes_base = '/data0/btb/references/igenomes/' + + // Local AWS iGenomes reference file paths on munin + igenomes_base = '/data1/references/igenomes/' } diff --git a/conf/pasteur.config b/conf/pasteur.config new file mode 100644 index 0000000..1d95131 --- /dev/null +++ b/conf/pasteur.config @@ -0,0 +1,23 @@ +//Profile config names for nf-core/configs +params { + config_profile_description = 'The Institut Pasteur HPC cluster profile' + config_profile_contact = 'Remi Planel (@rplanel)' + config_profile_url = 'https://research.pasteur.fr/en/service/tars-cluster' +} + +singularity { + enabled = true + autoMounts = true + runOptions = '-B /local/scratch:/tmp' +} + +process { + executor = 'slurm' +} + +params { + igenomesIgnore = true + max_memory = 256.GB + max_cpus = 28 + max_time = 24.h +} diff --git a/conf/prince.config b/conf/prince.config index 6ae4ee8..1c2ea2c 100644 --- a/conf/prince.config +++ b/conf/prince.config @@ -1,4 +1,6 @@ singularityDir = "$SCRATCH/singularity_images_nextflow" +singularityModule = "singularity/3.2.1" +squashfsModule = "squashfs/4.3" params { config_profile_description = """ @@ -17,8 +19,8 @@ singularity { process { beforeScript = """ - module load singularity/3.1.0 - module load squashfs/4.3 + module load $singularityModule + module load $squashfsModule """ .stripIndent() executor = 'slurm' diff --git a/conf/uppmax-devel.config b/conf/uppmax_devel.config similarity index 100% rename from conf/uppmax-devel.config rename to conf/uppmax_devel.config diff --git a/docs/cbe.md b/docs/cbe.md new file mode 100644 index 0000000..8a84294 --- /dev/null +++ b/docs/cbe.md @@ -0,0 +1,18 @@ +# nf-core/configs: CBE Configuration + +All nf-core pipelines have been successfully configured for use on the CLIP BATCH ENVIRONMENT (CBE) cluster at the Vienna BioCenter (VBC). + +To use, run the pipeline with `-profile cbe`. This will download and launch the [`cbe.config`](../conf/cbe.config) which has been pre-configured with a setup suitable for the CBE cluster. Using this profile, a docker image containing all of the required software will be downloaded, and converted to a Singularity image before execution of the pipeline. + +Before running the pipeline you will need to load Nextflow and Singularity using the environment module system on CBE. You can do this by issuing the commands below: + +```bash +## Load Nextflow and Singularity environment modules +module purge +module load nextflow/19.04.0 +module load singularity/3.2.1 +``` + +>NB: You will need an account to use the HPC cluster on CBE in order to run the pipeline. If in doubt contact IT. + +>NB: Nextflow will need to submit the jobs via the job scheduler to the HPC cluster and as such the commands above will have to be executed on one of the login nodes. If in doubt contact IT. diff --git a/docs/ccga.md b/docs/ccga.md new file mode 100644 index 0000000..1157cb3 --- /dev/null +++ b/docs/ccga.md @@ -0,0 +1,18 @@ +# nf-core/configs: CCGA Configuration + +Deployment and testing of nf-core pipelines at the CCGA cluster is on-going. + +To use, run the pipeline with `-profile ccga`. This will download and launch the [`ccga.config`](../conf/ccga.config) which has been pre-configured with a setup suitable for the CCGA cluster. Using this profile, a docker image containing all of the required software will be downloaded, and converted to a Singularity image before execution of the pipeline. + +Before running the pipeline you will need to load Nextflow and Singularity using the environment module system on the cluster. You can do this by issuing the commands below: + +```bash +## Load Nextflow and Singularity environment modules +module purge +module load IKMB +module load Java/1.8.0 +module load Nextflow +module load singularity3.1.0 +``` + +>NB: Access to the CCGA cluster is restricted to IKMB/CCGA employes. Please talk to Marc Hoeppner to get access (@marchoeppner). diff --git a/docs/ccga_dx.md b/docs/ccga_dx.md new file mode 100644 index 0000000..0cea81e --- /dev/null +++ b/docs/ccga_dx.md @@ -0,0 +1,9 @@ +# nf-core/configs: CCGA DX Configuration + +Deployment and testing of nf-core pipelines at the CCGA DX cluster is on-going. + +To use, run the pipeline with `-profile ccga_dx`. This will download and launch the [`ccga_dx.config`](../conf/ccga_dx.config) which has been pre-configured with a setup suitable for the CCGA cluster. Using this profile, a docker image containing all of the required software will be downloaded, and converted to a Singularity image before execution of the pipeline. + +Before running the pipeline you will need to have Nextflow installed. + +>NB: Access to the CCGA DX cluster is restricted to IKMB/CCGA employes. Please talk to Marc Hoeppner to get access (@marchoeppner). diff --git a/docs/cfc.md b/docs/cfc.md index e1cccac..5cf87d1 100644 --- a/docs/cfc.md +++ b/docs/cfc.md @@ -10,7 +10,7 @@ Before running the pipeline you will need to load Nextflow and Singularity using ## Load Nextflow and Singularity environment modules module purge module load devel/java_jdk/1.8.0u121 -module load qbic/singularity_slurm/3.0.1 +module load qbic/singularity_slurm/3.0.3 ``` diff --git a/docs/czbiohub.md b/docs/czbiohub.md new file mode 100644 index 0000000..0120283 --- /dev/null +++ b/docs/czbiohub.md @@ -0,0 +1,128 @@ +# nf-core/configs: CZ Biohub Configuration + +All nf-core pipelines have been successfully configured for use on the AWS Batch at the Chan Zuckerberg Biohub here. + +To use, run the pipeline with `-profile czbiohub_aws`. This will download and launch the [`czbiohub_aws.config`](../conf/czbiohub_aws.config) which has been pre-configured with a setup suitable for the AWS Batch. Using this profile, a docker image containing all of the required software will be downloaded, and converted to a Singularity image before execution of the pipeline. + +Ask Olga (olga.botvinnik@czbiohub.org) if you have any questions! + +## Run the pipeline from a small AWS EC2 Instance + +The pipeline will monitor and submit jobs to AWS Batch on your behalf. To ensure that the pipeline is successful, it will need to be run from a computer that has constant internet connection. Unfortunately for us, Biohub has spotty WiFi and even for short pipelines, it is highly recommended to run them from AWS. + +### 1. Start tmux + +[tmux](https://hackernoon.com/a-gentle-introduction-to-tmux-8d784c404340) is a "Terminal Multiplexer" that allows for commands to continue running even when you have closed your laptop. Start a new tmux session with `tmux new` and we'll name this session `nextflow`. + +``` +tmux new -n nextflow +``` + +Now you can run pipelines with abandon! + +### 2. Make a GitHub repo for your workflows (optional :) + + +To make sharing your pipelines and commands easy between your teammates, it's best to share code in a GitHub repository. One way is to store the commands in a Makefile ([example](https://github.com/czbiohub/kh-workflows/blob/master/nf-kmer-similarity/Makefile)) which can contain multiple `nextflow run` commands so that you don't need to remember the S3 bucket or output directory for every single one. [Makefiles](https://kbroman.org/minimal_make/) are broadly used in the software community for running many complex commands. Makefiles can have a lot of dependencies and be confusing, so we're only going to write *simple* Makefiles. + +``` +rnaseq: + nextflow run -profile czbiohub_aws nf-core/rnaseq \ + --reads 's3://czb-maca/Plate_seq/24_month/180626_A00111_0166_BH5LNVDSXX/fastqs/*{R1,R2}*.fastq.gz' \ + --genome GRCm38 \ + --outdir s3://olgabot-maca/nextflow-test/ + +human_mouse_zebrafish: + nextflow run czbiohub/nf-kmer-similarity -latest -profile aws \ + --samples s3://kmer-hashing/hematopoeisis/smartseq2/human_mouse_zebrafish/samples.csv + + +merkin2012_aws: + nextflow run czbiohub/nf-kmer-similarity -latest --sra "SRP016501" \ + -r olgabot/support-csv-directory-or-sra \ + -profile aws +``` + +In this example, one would run the `rnaseq` rule and the nextflow command beneath it with: + +``` +make rnaseq +``` + +If one wanted to run a different command, e.g. `human_mouse_zebrafish`, they would specify that command instead. For example: + +``` +make human_mouse_zebrafish +``` + +Makefiles are a very useful way of storing longer commands with short mnemonic words. + + +Once you [create a new repository](https://github.com/organizations/czbiohub/repositories/new) (best to initialize with a `.gitignore`, license - MIT and `README`), clone that repository to your EC2 instance. For example, if the repository is called `kh-workflows`, this is what the command would look like: + +``` +git clone https://github.com/czbiohub/kh-workflows +``` + +Now both create and edit a `Makefile`: + +``` +cd +nano Makefile +``` + +Write your rule with a colon after it, and on the next line must be a **tab**, not spaces. Once you're done, exit the program (the `^` command shown in nano means "Control"), write the file, add it to git, commit it, and push it up to GitHub. + + +``` +git add Makefile +git commit -m "Added makefile" +git push origin master +``` + + +### 3. Run your workflow!! + + +Remember to specify `-profile czbiohub_aws` to grab the CZ Biohub-specific AWS configurations, and an `--outdir` with an AWS S3 bucket so you don't run out of space on your small AMI + +``` +nextflow run -profile czbiohub_aws nf-core/rnaseq \ + --reads 's3://czb-maca/Plate_seq/24_month/180626_A00111_0166_BH5LNVDSXX/fastqs/*{R1,R2}*.fastq.gz' \ + --genome GRCm38 \ + --outdir s3://olgabot-maca/nextflow-test/ +``` + +### 4. If you lose connection, how do you restart the jobs? + +If you close your laptop, get onto the train, or lose WiFi connection, you may lose connection to AWS and may need to restart the jobs. To reattach, use the command `tmux attach` and you should see your Nextflow output! To get the named session, use: + +``` +tmux attach -n nextflow +``` + +To restart the jobs from where you left off, add the `-resume` flag to your `nextflow` command: + + +``` +nextflow run -profile czbiohub_aws nf-core/rnaseq \ + --reads 's3://czb-maca/Plate_seq/24_month/180626_A00111_0166_BH5LNVDSXX/fastqs/*{R1,R2}*.fastq.gz' \ + --genome GRCm38 \ + --outdir s3://olgabot-maca/nextflow-test/ \ + -resume +``` + +It's important that this command be re-run from the same directory as there is a "hidden" `.nextflow` folder that contains all the metadata and information about previous runs. + +## iGenomes specific configuration + +A local copy of the iGenomes resource has been made available on `s3://czbiohub-reference/igenomes` (in `us-west-2` region) so you should be able to run the pipeline against any reference available in the `igenomes.config` specific to the nf-core pipeline. + +You can do this by simply using the `--genome ` parameter. + +For Human and Mouse, we use [GENCODE](https://www.gencodegenes.org/) gene annotations. This doesn't change how you would specify the genome name, only that the pipelines run with the `czbiohub_aws` profile would be with GENCODE rather than iGenomes. + + +>NB: You will need an account to use the HPC cluster on PROFILE CLUSTER in order to run the pipeline. If in doubt contact IT. + +>NB: Nextflow will need to submit the jobs via the job scheduler to the HPC cluster and as such the commands above will have to be executed on one of the login nodes. If in doubt contact IT. diff --git a/docs/genouest.md b/docs/genouest.md new file mode 100644 index 0000000..85341ef --- /dev/null +++ b/docs/genouest.md @@ -0,0 +1,38 @@ +# nf-core/configs: GenOuest Configuration + +All nf-core pipelines have been successfully configured for use on the GenOuest cluster. + +To use, run the pipeline with `-profile genouest`. This will download and launch the [`genouest.config`](../conf/genouest.config) which has been pre-configured with a setup suitable for the GenOuest cluster. Using this profile, a docker image containing all of the required software will be downloaded, and converted to a Singularity image before execution of the pipeline. + +## Running the workflow on the GenOuest cluster + +Nextflow is installed on the GenOuest cluster. Some documentation is available on the [GenOuest website](https://www.genouest.org/howto/#nextflow). + +You need to activate it like this: + +```bash +source /local/env/envnextflow-19.07.0.sh +``` + +Nextflow manages each process as a separate job that is submitted to the cluster by using the sbatch command. +Nextflow shouldn't run directly on the submission node but on a compute node. Run nextflow from a compute node: + +```bash +# Login to a compute node +srun --pty bash + +# Load the dependencies if not done before +source /local/env/envnextflow-19.07.0.sh + +# Run a downloaded/git-cloned nextflow workflow from +nextflow run \\ +/path/to/nf-core/workflow \\ +-resume +-profile genouest \\ +--email my-email@example.org \\ +-c my-specific.config +... + +# Or use the nf-core client +nextflow run nf-core/rnaseq ... +``` diff --git a/docs/images/nfcore-configs_logo.png b/docs/images/nfcore-configs_logo.png new file mode 100755 index 0000000..eef0738 Binary files /dev/null and b/docs/images/nfcore-configs_logo.png differ diff --git a/docs/kraken.md b/docs/kraken.md new file mode 100644 index 0000000..9426087 --- /dev/null +++ b/docs/kraken.md @@ -0,0 +1,10 @@ +# nf-core/configs: KRAKEN Configuration + +This profile can be **only** combined with `jenkins.config`. It is used for +testing pipeline with real data on **in-house** cluster located at SciLifeLab. + +To use, run the pipeline with `-profile kraken`. This will download and launch +the [`kraken.config`](../conf/kraken.config) which has been pre-configured to +test the pipeline using `docker` by default. + +Example: `nextflow run -profile kraken,jenkins` diff --git a/docs/mendel.md b/docs/mendel.md index 3f3db20..ae0ad4c 100644 --- a/docs/mendel.md +++ b/docs/mendel.md @@ -2,27 +2,15 @@ All nf-core pipelines have been successfully configured for use on the MENDEL CLUSTER at the Gregor Mendel Institute (GMI). -To use, run the pipeline with `-profile conda,mendel`. This will download and launch the [`mendel.config`](../conf/mendel.config) which has been pre-configured with a setup suitable for the MENDEL cluster. A Conda environment will be created automatically and software dependencies will be downloaded from ['bioconda'](https://bioconda.github.io/). +To use, run the pipeline with `-profile conda,mendel`. This will download and launch the [`mendel.config`](../conf/mendel.config) which has been pre-configured with a setup suitable for the MENDEL cluster. A Conda environment will be created automatically and software dependencies will be resolved via [bioconda](https://bioconda.github.io/). -Theoretically, using `-profile singularity,mendel` would download a docker image containing all of the required software, and convert it to a Singularity image before execution of the pipeline. However, there is a regression in the Singularity deployment on MENDEL which renders containers downloaded from public repositories unusable because they lack the /lustre mountpoint. - -If you want to run the pipeline containerized anyway you will have to build the image yourself (on a machine where you have root access) using the provided `Singularity` file in the pipeline repository: - -```bash -cd /path/to/pipeline-repository -echo 'mkdir /lustre > Singularity' -singularity build nf-core-methylseq-custom.simg Singularity -``` - -After you copied the container image to the cluster filesystem, make sure to pass the path to the image to the pipeline with `-with-singularity /path/to/nf-core-methylseq-custom.simg` - -Before running the pipeline you will need to load Nextflow and Conda using the environment module system on MENDEL. You can do this by issuing the commands below: +Before running the pipeline you will need to load Conda using the environment module system on MENDEL. You can do this by issuing the commands below: ```bash ## Load Nextflow and Conda environment modules module purge module load Nextflow -module load Miniconda3 # not needed if using Singularity +module load Miniconda/4.6.7 ``` >NB: You will need an account to use the HPC cluster in order to run the pipeline. If in doubt contact the HPC team. diff --git a/docs/munin.md b/docs/munin.md index 17233bb..167ff9b 100644 --- a/docs/munin.md +++ b/docs/munin.md @@ -1,15 +1,31 @@ # nf-core/configs: MUNIN Configuration -All nf-core pipelines have been successfully configured for use on the MUNIN cluster aka big iron. +All nf-core pipelines have been successfully configured for use on the MUNIN cluster. -To use, run the pipeline with `-profile munin`. This will download and launch the [`munin.config`](../conf/munin.config) which has been pre-configured with a setup suitable for the MUNIN cluster. Using this profile, a docker image containing all of the required software will be downloaded, and converted to a Singularity image before execution of the pipeline. +## Usage + +To use, run the pipeline with `-profile munin`. + +This will download and launch the [`munin.config`](../conf/munin.config) which has been pre-configured with a setup suitable for the MUNIN cluster. Example: `nextflow run -profile munin` -## Docker +### Singularity + +This is the default behavior of this configuration profile. + +Using this profile, if no singularity image are available, one will be downloaded from dockerhub, and converted to a Singularity image before execution of the pipeline. + +It is also possible to specify the singularity profile: + +Example: `nextflow run -profile munin,singularity` + +### Docker It is also possible to execute the pipeline using Docker. +Using this profile, if no docker image are available, one will be downloaded from dockerhub before execution of the pipeline. + Example: `nextflow run -profile munin,docker` ## Below are non-mandatory information on iGenomes specific configuration diff --git a/docs/pasteur.md b/docs/pasteur.md new file mode 100644 index 0000000..554761b --- /dev/null +++ b/docs/pasteur.md @@ -0,0 +1,56 @@ +# nf-core/configs: Institut Pasteur Configuration + +All nf-core pipelines have been successfully configured for use on the tars cluster at the Institut Pasteur. + +To use, run the pipeline with `-profile pasteur`. This will download and launch the [`pasteur.config`](../conf/pasteur.config) which has been pre-configured with a setup suitable for the Pasteur cluster. Using this profile, a docker image containing all of the required software will be downloaded, and converted to a Singularity image before execution of the pipeline. + + + +## Running the workflow on the Pasteur cluster + +Nextflow is not installed by default on the Pasteur cluster. +- Install Nextflow : [here](https://www.nextflow.io/docs/latest/getstarted.html#) + + +Nextflow manages each process as a separate job that is submitted to the cluster by using the sbatch command. +Nextflow shouldn't run directly on the submission node but on a compute node. +The compute nodes don't have access to internet so you need to run it offline. + +To do that: +1. Create a virtualenv to install nf-core +```bash +module purge +module load Python/3.6.0 +module load java +module load singularity +cd /path/to/nf-core/workflows +virtualenv .venv -p python3 +. .venv/bin/activate +``` +2. Install nf-core: [here](https://nf-co.re/tools#installation) +3. Get nf-core pipeline and container: [here](https://nf-co.re/tools#downloading-pipelines-for-offline-use) +4. Get the nf-core Pasteur profile: [here](https://github.com/nf-core/rnaseq/blob/master/docs/usage.md#--custom_config_base) +5. Run nextflow on a compute node: +```bash +# create a terminal +tmux + +# Get a compute node +salloc + +# Load the dependencies if not done before +module purge +module load java +module load singularity + +# Run nextflow workflow +nextflow run \\ +/path/to/pipeline-dir/from/step/3/workflow \\ +-resume +-profile pasteur \\ +-with-singularity /path/to/pipeline-dir/from/step/3/singularity-images/singularity.img \\ +--email my-email@pasteur.fr \\ +--custom_config_base /path/to/configs/from/step/4/ \\ +-c my-specific.config +... +``` diff --git a/nfcore_custom.config b/nfcore_custom.config index 7374fa0..06182f5 100644 --- a/nfcore_custom.config +++ b/nfcore_custom.config @@ -12,22 +12,29 @@ params.custom_config_version = 'master' params.custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" profiles { + bigpurple { includeConfig "${params.custom_config_base}/conf/bigpurple.config" } binac { includeConfig "${params.custom_config_base}/conf/binac.config" } + cbe { includeConfig "${params.custom_config_base}/conf/cbe.config" } ccga { includeConfig "${params.custom_config_base}/conf/ccga.config" } + ccga_dx { includeConfig "${params.custom_config_base}/conf/ccga_dx.config" } cfc { includeConfig "${params.custom_config_base}/conf/cfc.config" } crick { includeConfig "${params.custom_config_base}/conf/crick.config" } + czbiohub_aws { includeConfig "${params.custom_config_base}/conf/czbiohub_aws.config" } + czbiohub_aws_highpriority { includeConfig "${params.custom_config_base}/conf/czbiohub_aws.config"; includeConfig "${params.custom_config_base}/conf/czbiohub_aws_highpriority.config" } + genouest { includeConfig "${params.custom_config_base}/conf/genouest.config" } gis { includeConfig "${params.custom_config_base}/conf/gis.config" } hebbe { includeConfig "${params.custom_config_base}/conf/hebbe.config" } + kraken { includeConfig "${params.custom_config_base}/conf/kraken.config" } mendel { includeConfig "${params.custom_config_base}/conf/mendel.config" } munin { includeConfig "${params.custom_config_base}/conf/munin.config" } + pasteur { includeConfig "${params.custom_config_base}/conf/pasteur.config" } phoenix { includeConfig "${params.custom_config_base}/conf/phoenix.config" } + prince { includeConfig "${params.custom_config_base}/conf/prince.config" } shh { includeConfig "${params.custom_config_base}/conf/shh.config" } uct_hex { includeConfig "${params.custom_config_base}/conf/uct_hex.config" } - uppmax_devel { includeConfig "${params.custom_config_base}/conf/uppmax.config"; includeConfig "${params.custom_config_base}/conf/uppmax-devel.config" } uppmax { includeConfig "${params.custom_config_base}/conf/uppmax.config" } + uppmax_devel { includeConfig "${params.custom_config_base}/conf/uppmax.config"; includeConfig "${params.custom_config_base}/conf/uppmax_devel.config" } uzh { includeConfig "${params.custom_config_base}/conf/uzh.config" } - prince { includeConfig "${params.custom_config_base}/conf/prince.config" } - bigpurple { includeConfig "${params.custom_config_base}/conf/bigpurple.config" } } // If user hostnames contain one of these substring and they are @@ -37,6 +44,7 @@ params { // This is a groovy map, not a nextflow parameter set hostnames = [ crick: ['.thecrick.org'], + genouest: ['.genouest.org'], uppmax: ['.uppmax.uu.se'] ] }