1
0
Fork 0
mirror of https://github.com/MillironX/nf-configs.git synced 2024-11-22 00:26:03 +00:00

Merge remote-tracking branch 'upstream/master'

This commit is contained in:
Alexander Peltzer 2019-07-30 16:53:19 +02:00
commit 35c715ca3b
No known key found for this signature in database
GPG key ID: A4A9322B50AF95A0
21 changed files with 453 additions and 26 deletions

1
.gitignore vendored
View file

@ -3,3 +3,4 @@ work/
data/
results/
.DS_Store
*.code-workspace

View file

@ -1,6 +1,4 @@
<img src="docs/images/nf-core-logo.png" width="400">
# [nf-core/configs](https://github.com/nf-core/configs)
# ![nf-core/configs](docs/images/nfcore-configs_logo.png)
[![Build Status](https://travis-ci.org/nf-core/configs.svg?branch=master)](https://travis-ci.org/nf-core/configs)
@ -88,6 +86,7 @@ Currently documentation is available for the following clusters:
* [BINAC](docs/binac.md)
* [BIGPURPLE](docs/bigpurple.md)
* [CCGA](docs/ccga.md)
* [CCGA_DX](/docs/ccga_dx.md)
* [CFC](docs/binac.md)
* [CRICK](docs/crick.md)
* [GIS](docs/gis.md)
@ -101,6 +100,7 @@ Currently documentation is available for the following clusters:
* [UPPMAX-DEVEL](docs/uppmax-devel.md)
* [UPPMAX](docs/uppmax.md)
* [UZH](docs/uzh.md)
* [PASTEUR](docs/pasteur.md)
### Uploading to `nf-core/configs`

View file

@ -10,7 +10,7 @@ singularity {
}
process {
beforeScript = 'module load devel/singularity/3.0.1'
beforeScript = 'module load devel/singularity/3.0.3'
executor = 'pbs'
queue = 'short'
}

View file

@ -2,15 +2,20 @@
params {
config_profile_description = 'CCGA cluster profile provided by nf-core/configs.'
config_profile_contact = 'Marc Hoeppner (@marchoeppner)'
config_profile_url = 'https://www.ikmb.uni-kiel.de/'
config_profile_url = 'https://www.ccga.uni-kiel.de/'
}
/*
* -------------------------------------------------
* Nextflow config file with environment modules for RZCluster in Kiel
* Nextflow config file for CCGA cluster in Kiel
* -------------------------------------------------
*/
singularity {
enabled = true
runOptions = "-B /ifs -B /scratch -B /work_beegfs"
}
executor {
queueSize=100
}
@ -29,4 +34,7 @@ params {
// illumina iGenomes reference file paths on RZCluster
igenomes_base = '/ifs/data/nfs_share/ikmb_repository/references/iGenomes/references/'
saveReference = true
max_memory = 128.GB
max_cpus = 16
max_time = 120.h
}

37
conf/ccga_dx.config Normal file
View file

@ -0,0 +1,37 @@
//Profile config names for nf-core/configs
params {
config_profile_description = 'CCGA DX cluster profile provided by nf-core/configs.'
config_profile_contact = 'Marc Hoeppner (@marchoeppner)'
config_profile_url = 'https://www.ccga.uni-kiel.de/'
}
/*
* -------------------------------------------------
* Nextflow config file for CCGA cluster in Kiel
* -------------------------------------------------
*/
singularity {
enabled = true
}
executor {
queueSize=100
}
process {
// Global process config
executor = 'slurm'
queue = 'htc'
}
params {
// illumina iGenomes reference file paths on DX Cluster
igenomes_base = '/mnt/ld_ng_out/sukmb352/references/iGenomes/references/'
saveReference = true
max_memory = 250.GB
max_cpus = 20
max_time = 240.h
}

View file

@ -14,6 +14,11 @@ process {
executor = 'slurm'
}
weblog{
enabled = true
url = 'http://services.qbic.uni-tuebingen.de:8080/workflowservice/workflows'
}
params {
igenomes_base = '/nfsmounts/igenomes'
max_memory = 60.GB

129
conf/czbiohub_aws.config Normal file
View file

@ -0,0 +1,129 @@
/*
* -------------------------------------------------
* Nextflow config file for Chan Zuckerberg Biohub
* -------------------------------------------------
* Defines reference genomes, using iGenome paths
* Imported under the default 'standard' Nextflow
* profile in nextflow.config
*/
//Profile config names for nf-core/configs
params {
config_profile_description = 'Chan Zuckerberg Biohub AWS Batch profile provided by nf-core/configs.'
config_profile_contact = 'Olga Botvinnik (@olgabot)'
config_profile_url = 'https://www.czbiohub.org/'
}
docker {
enabled = true
}
process {
executor = 'awsbatch'
queue = 'default-971039e0-830c-11e9-9e0b-02c5b84a8036'
errorStrategy = 'ignore'
}
workDir = "s3://czb-nextflow/intermediates/"
aws.region = 'us-west-2'
executor.awscli = '/home/ec2-user/miniconda/bin/aws'
params.tracedir = './'
params {
saveReference = true
// Largest SPOT instances available on AWS: https://ec2instances.info/
max_memory = 1952.GB
max_cpus = 96
max_time = 240.h
// Compatible with multiple versions of rnaseq pipeline
seq_center = "czbiohub"
seqCenter = "czbiohub"
// illumina iGenomes reference file paths on CZ Biohub reference s3 bucket
// No final slash because it's added later
igenomes_base = "s3://czbiohub-reference/igenomes"
// GENCODE (human + mouse) reference file paths on CZ Biohub reference s3 bucket
// No final slash because it's added later
gencode_base = "s3://czbiohub-reference/gencode"
transgenes_base = "s3://czbiohub-reference/transgenes"
// AWS configurations
awsregion = "us-west-2"
awsqueue = "nextflow"
igenomesIgnore = true
fc_extra_attributes = 'gene_name'
fc_group_features = 'gene_id'
fc_group_features_type = 'gene_type'
trim_pattern = '_+S\\d+'
// GENCODE GTF and fasta files
genomes {
'GRCh38' {
fasta = "${params.gencode_base}/human/v30/GRCh38.p12.genome.ERCC92.fa"
gtf = "${params.gencode_base}/human/v30/gencode.v30.annotation.ERCC92.gtf"
transcript_fasta = "${params.gencode_base}/human/v30/gencode.v30.transcripts.ERCC92.fa"
star = "${params.gencode_base}/human/v30/STARIndex/"
salmon_index = "${params.gencode_base}/human/v30/salmon_index/"
}
'GRCm38' {
fasta = "${params.gencode_base}/mouse/vM21/GRCm38.p6.genome.ERCC92.fa"
gtf = "${params.gencode_base}/mouse/vM21/gencode.vM21.annotation.ERCC92.gtf"
transcript_fasta = "${params.gencode_base}/mouse/vM21/gencode.vM21.transcripts.ERCC92.fa"
star = "${params.gencode_base}/mouse/vM21/STARIndex/"
}
}
transgenes {
'ChR2' {
fasta = "${params.transgenes_base}/ChR2/ChR2.fa"
gtf = "${params.transgenes_base}/ChR2/ChR2.gtf"
}
'Cre' {
fasta = "${params.transgenes_base}/Cre/Cre.fa"
gtf = "${params.transgenes_base}/Cre/Cre.gtf"
}
'ERCC' {
fasta = "${params.transgenes_base}/ERCC92/ERCC92.fa"
gtf = "${params.transgenes_base}/ERCC92/ERCC92.gtf"
}
'GCaMP6m' {
fasta = "${params.transgenes_base}/GCaMP6m/GCaMP6m.fa"
gtf = "${params.transgenes_base}/GCaMP6m/GCaMP6m.gtf"
}
'GFP' {
fasta = "${params.transgenes_base}/Gfp/Gfp.fa"
gtf = "${params.transgenes_base}/Gfp/Gfp.gtf"
}
'NpHR' {
fasta = "${params.transgenes_base}/NpHR/NpHR.fa"
gtf = "${params.transgenes_base}/NpHR/NpHR.gtf"
}
'RCaMP' {
fasta = "${params.transgenes_base}/RCaMP/RCaMP.fa"
gtf = "${params.transgenes_base}/RCaMP/RCaMP.gtf"
}
'RGECO' {
fasta = "${params.transgenes_base}/RGECO/RGECO.fa"
gtf = "${params.transgenes_base}/RGECO/RGECO.gtf"
}
'Tdtom' {
fasta = "${params.transgenes_base}/Tdtom/Tdtom.fa"
gtf = "${params.transgenes_base}/Tdtom/Tdtom.gtf"
}
'Car-T' {
fasta = "${params.transgenes_base}/car-t/car-t.fa"
gtf = "${params.transgenes_base}/car-t/car-t.gtf"
}
'zsGreen' {
fasta = "${params.transgenes_base}/zsGreen/zsGreen.fa"
gtf = "${params.transgenes_base}/zsGreen/zsGreen.gtf"
}
}
}

View file

@ -0,0 +1,12 @@
/*
* -------------------------------------------------
* Nextflow config file for Chan Zuckerberg Biohub
* -------------------------------------------------
* Defines reference genomes, using iGenome paths
* Imported under the default 'standard' Nextflow
* profile in nextflow.config
*/
process {
queue = 'highpriority-971039e0-830c-11e9-9e0b-02c5b84a8036'
}

View file

@ -10,7 +10,7 @@ manifest {
}
process {
beforeScript = {'module load Singularity; module load Miniconda3'}
beforeScript = 'module load Miniconda3/4.6.7'
executor = 'pbspro'
clusterOptions = { "-P $params.project" }
}

View file

@ -28,5 +28,5 @@ params {
max_cpus = 16
max_time = 72.h
// illumina iGenomes reference file paths on UPPMAX
igenomes_base = '/data0/btb/references/igenomes/'
igenomes_base = '/data1/references/igenomes/'
}

23
conf/pasteur.config Normal file
View file

@ -0,0 +1,23 @@
//Profile config names for nf-core/configs
params {
config_profile_description = 'The Institut Pasteur HPC cluster profile'
config_profile_contact = 'Remi Planel (@rplanel)'
config_profile_url = 'https://research.pasteur.fr/en/service/tars-cluster'
}
singularity {
enabled = true
autoMounts = true
runOptions = '-B /local/scratch:/tmp'
}
process {
executor = 'slurm'
}
params {
igenomesIgnore = true
max_memory = 256.GB
max_cpus = 28
max_time = 24.h
}

View file

@ -1,4 +1,6 @@
singularityDir = "$SCRATCH/singularity_images_nextflow"
singularityModule = "singularity/3.2.1"
squashfsModule = "squashfs/4.3"
params {
config_profile_description = """
@ -17,8 +19,8 @@ singularity {
process {
beforeScript = """
module load singularity/3.1.0
module load squashfs/4.3
module load $singularityModule
module load $squashfsModule
"""
.stripIndent()
executor = 'slurm'

View file

@ -8,12 +8,16 @@ params {
singularity {
enabled = true
autoMounts = true
runOptions = '-B /run/shm:/run/shm'
cacheDir = "/projects1/singularity_scratch/cache/"
}
process {
executor = 'slurm'
queue = 'short'
}
executor {
queueSize = 16
}

18
docs/ccga.md Normal file
View file

@ -0,0 +1,18 @@
# nf-core/configs: CCGA Configuration
Deployment and testing of nf-core pipelines at the CCGA cluster is on-going.
To use, run the pipeline with `-profile ccga`. This will download and launch the [`ccga.config`](../conf/ccga.config) which has been pre-configured with a setup suitable for the CCGA cluster. Using this profile, a docker image containing all of the required software will be downloaded, and converted to a Singularity image before execution of the pipeline.
Before running the pipeline you will need to load Nextflow and Singularity using the environment module system on the cluster. You can do this by issuing the commands below:
```bash
## Load Nextflow and Singularity environment modules
module purge
module load IKMB
module load Java/1.8.0
module load Nextflow
module load singularity3.1.0
```
>NB: Access to the CCGA cluster is restricted to IKMB/CCGA employes. Please talk to Marc Hoeppner to get access (@marchoeppner).

9
docs/ccga_dx.md Normal file
View file

@ -0,0 +1,9 @@
# nf-core/configs: CCGA DX Configuration
Deployment and testing of nf-core pipelines at the CCGA DX cluster is on-going.
To use, run the pipeline with `-profile ccga_dx`. This will download and launch the [`ccga_dx.config`](../conf/ccga_dx.config) which has been pre-configured with a setup suitable for the CCGA cluster. Using this profile, a docker image containing all of the required software will be downloaded, and converted to a Singularity image before execution of the pipeline.
Before running the pipeline you will need to have Nextflow installed.
>NB: Access to the CCGA DX cluster is restricted to IKMB/CCGA employes. Please talk to Marc Hoeppner to get access (@marchoeppner).

View file

@ -10,7 +10,7 @@ Before running the pipeline you will need to load Nextflow and Singularity using
## Load Nextflow and Singularity environment modules
module purge
module load devel/java_jdk/1.8.0u121
module load qbic/singularity_slurm/3.0.1
module load qbic/singularity_slurm/3.0.3
```

128
docs/czbiohub.md Normal file
View file

@ -0,0 +1,128 @@
# nf-core/configs: CZ Biohub Configuration
All nf-core pipelines have been successfully configured for use on the AWS Batch at the Chan Zuckerberg Biohub here.
To use, run the pipeline with `-profile czbiohub_aws`. This will download and launch the [`czbiohub_aws.config`](../conf/czbiohub_aws.config) which has been pre-configured with a setup suitable for the AWS Batch. Using this profile, a docker image containing all of the required software will be downloaded, and converted to a Singularity image before execution of the pipeline.
Ask Olga (olga.botvinnik@czbiohub.org) if you have any questions!
## Run the pipeline from a small AWS EC2 Instance
The pipeline will monitor and submit jobs to AWS Batch on your behalf. To ensure that the pipeline is successful, it will need to be run from a computer that has constant internet connection. Unfortunately for us, Biohub has spotty WiFi and even for short pipelines, it is highly recommended to run them from AWS.
### 1. Start tmux
[tmux](https://hackernoon.com/a-gentle-introduction-to-tmux-8d784c404340) is a "Terminal Multiplexer" that allows for commands to continue running even when you have closed your laptop. Start a new tmux session with `tmux new` and we'll name this session `nextflow`.
```
tmux new -n nextflow
```
Now you can run pipelines with abandon!
### 2. Make a GitHub repo for your workflows (optional :)
To make sharing your pipelines and commands easy between your teammates, it's best to share code in a GitHub repository. One way is to store the commands in a Makefile ([example](https://github.com/czbiohub/kh-workflows/blob/master/nf-kmer-similarity/Makefile)) which can contain multiple `nextflow run` commands so that you don't need to remember the S3 bucket or output directory for every single one. [Makefiles](https://kbroman.org/minimal_make/) are broadly used in the software community for running many complex commands. Makefiles can have a lot of dependencies and be confusing, so we're only going to write *simple* Makefiles.
```
rnaseq:
nextflow run -profile czbiohub_aws nf-core/rnaseq \
--reads 's3://czb-maca/Plate_seq/24_month/180626_A00111_0166_BH5LNVDSXX/fastqs/*{R1,R2}*.fastq.gz' \
--genome GRCm38 \
--outdir s3://olgabot-maca/nextflow-test/
human_mouse_zebrafish:
nextflow run czbiohub/nf-kmer-similarity -latest -profile aws \
--samples s3://kmer-hashing/hematopoeisis/smartseq2/human_mouse_zebrafish/samples.csv
merkin2012_aws:
nextflow run czbiohub/nf-kmer-similarity -latest --sra "SRP016501" \
-r olgabot/support-csv-directory-or-sra \
-profile aws
```
In this example, one would run the `rnaseq` rule and the nextflow command beneath it with:
```
make rnaseq
```
If one wanted to run a different command, e.g. `human_mouse_zebrafish`, they would specify that command instead. For example:
```
make human_mouse_zebrafish
```
Makefiles are a very useful way of storing longer commands with short mnemonic words.
Once you [create a new repository](https://github.com/organizations/czbiohub/repositories/new) (best to initialize with a `.gitignore`, license - MIT and `README`), clone that repository to your EC2 instance. For example, if the repository is called `kh-workflows`, this is what the command would look like:
```
git clone https://github.com/czbiohub/kh-workflows
```
Now both create and edit a `Makefile`:
```
cd
nano Makefile
```
Write your rule with a colon after it, and on the next line must be a **tab**, not spaces. Once you're done, exit the program (the `^` command shown in nano means "Control"), write the file, add it to git, commit it, and push it up to GitHub.
```
git add Makefile
git commit -m "Added makefile"
git push origin master
```
### 3. Run your workflow!!
Remember to specify `-profile czbiohub_aws` to grab the CZ Biohub-specific AWS configurations, and an `--outdir` with an AWS S3 bucket so you don't run out of space on your small AMI
```
nextflow run -profile czbiohub_aws nf-core/rnaseq \
--reads 's3://czb-maca/Plate_seq/24_month/180626_A00111_0166_BH5LNVDSXX/fastqs/*{R1,R2}*.fastq.gz' \
--genome GRCm38 \
--outdir s3://olgabot-maca/nextflow-test/
```
### 4. If you lose connection, how do you restart the jobs?
If you close your laptop, get onto the train, or lose WiFi connection, you may lose connection to AWS and may need to restart the jobs. To reattach, use the command `tmux attach` and you should see your Nextflow output! To get the named session, use:
```
tmux attach -n nextflow
```
To restart the jobs from where you left off, add the `-resume` flag to your `nextflow` command:
```
nextflow run -profile czbiohub_aws nf-core/rnaseq \
--reads 's3://czb-maca/Plate_seq/24_month/180626_A00111_0166_BH5LNVDSXX/fastqs/*{R1,R2}*.fastq.gz' \
--genome GRCm38 \
--outdir s3://olgabot-maca/nextflow-test/ \
-resume
```
It's important that this command be re-run from the same directory as there is a "hidden" `.nextflow` folder that contains all the metadata and information about previous runs.
## iGenomes specific configuration
A local copy of the iGenomes resource has been made available on `s3://czbiohub-reference/igenomes` (in `us-west-2` region) so you should be able to run the pipeline against any reference available in the `igenomes.config` specific to the nf-core pipeline.
You can do this by simply using the `--genome <GENOME_ID>` parameter.
For Human and Mouse, we use [GENCODE](https://www.gencodegenes.org/) gene annotations. This doesn't change how you would specify the genome name, only that the pipelines run with the `czbiohub_aws` profile would be with GENCODE rather than iGenomes.
>NB: You will need an account to use the HPC cluster on PROFILE CLUSTER in order to run the pipeline. If in doubt contact IT.
>NB: Nextflow will need to submit the jobs via the job scheduler to the HPC cluster and as such the commands above will have to be executed on one of the login nodes. If in doubt contact IT.

Binary file not shown.

After

Width:  |  Height:  |  Size: 14 KiB

View file

@ -2,27 +2,15 @@
All nf-core pipelines have been successfully configured for use on the MENDEL CLUSTER at the Gregor Mendel Institute (GMI).
To use, run the pipeline with `-profile conda,mendel`. This will download and launch the [`mendel.config`](../conf/mendel.config) which has been pre-configured with a setup suitable for the MENDEL cluster. A Conda environment will be created automatically and software dependencies will be downloaded from ['bioconda'](https://bioconda.github.io/).
To use, run the pipeline with `-profile conda,mendel`. This will download and launch the [`mendel.config`](../conf/mendel.config) which has been pre-configured with a setup suitable for the MENDEL cluster. A Conda environment will be created automatically and software dependencies will be resolved via [bioconda](https://bioconda.github.io/).
Theoretically, using `-profile singularity,mendel` would download a docker image containing all of the required software, and convert it to a Singularity image before execution of the pipeline. However, there is a regression in the Singularity deployment on MENDEL which renders containers downloaded from public repositories unusable because they lack the /lustre mountpoint.
If you want to run the pipeline containerized anyway you will have to build the image yourself (on a machine where you have root access) using the provided `Singularity` file in the pipeline repository:
```bash
cd /path/to/pipeline-repository
echo 'mkdir /lustre > Singularity'
singularity build nf-core-methylseq-custom.simg Singularity
```
After you copied the container image to the cluster filesystem, make sure to pass the path to the image to the pipeline with `-with-singularity /path/to/nf-core-methylseq-custom.simg`
Before running the pipeline you will need to load Nextflow and Conda using the environment module system on MENDEL. You can do this by issuing the commands below:
Before running the pipeline you will need to load Conda using the environment module system on MENDEL. You can do this by issuing the commands below:
```bash
## Load Nextflow and Conda environment modules
module purge
module load Nextflow
module load Miniconda3 # not needed if using Singularity
module load Miniconda/4.6.7
```
>NB: You will need an account to use the HPC cluster in order to run the pipeline. If in doubt contact the HPC team.

56
docs/pasteur.md Normal file
View file

@ -0,0 +1,56 @@
# nf-core/configs: Institut Pasteur Configuration
All nf-core pipelines have been successfully configured for use on the tars cluster at the Institut Pasteur.
To use, run the pipeline with `-profile pasteur`. This will download and launch the [`pasteur.config`](../conf/pasteur.config) which has been pre-configured with a setup suitable for the Pasteur cluster. Using this profile, a docker image containing all of the required software will be downloaded, and converted to a Singularity image before execution of the pipeline.
## Running the workflow on the Pasteur cluster
Nextflow is not installed by default on the Pasteur cluster.
- Install Nextflow : [here](https://www.nextflow.io/docs/latest/getstarted.html#)
Nextflow manages each process as a separate job that is submitted to the cluster by using the sbatch command.
Nextflow shouldn't run directly on the submission node but on a compute node.
The compute nodes don't have access to internet so you need to run it offline.
To do that:
1. Create a virtualenv to install nf-core
```bash
module purge
module load Python/3.6.0
module load java
module load singularity
cd /path/to/nf-core/workflows
virtualenv .venv -p python3
. .venv/bin/activate
```
2. Install nf-core: [here](https://nf-co.re/tools#installation)
3. Get nf-core pipeline and container: [here](https://nf-co.re/tools#downloading-pipelines-for-offline-use)
4. Get the nf-core Pasteur profile: [here](https://github.com/nf-core/rnaseq/blob/master/docs/usage.md#--custom_config_base)
5. Run nextflow on a compute node:
```bash
# create a terminal
tmux
# Get a compute node
salloc
# Load the dependencies if not done before
module purge
module load java
module load singularity
# Run nextflow workflow
nextflow run \\
/path/to/pipeline-dir/from/step/3/workflow \\
-resume
-profile pasteur \\
-with-singularity /path/to/pipeline-dir/from/step/3/singularity-images/singularity.img \\
--email my-email@pasteur.fr \\
--custom_config_base /path/to/configs/from/step/4/ \\
-c my-specific.config
...
```

View file

@ -15,8 +15,14 @@ profiles {
aquila { includeConfig "${params.custom_config_base}/conf/aquila.config" }
binac { includeConfig "${params.custom_config_base}/conf/binac.config" }
ccga { includeConfig "${params.custom_config_base}/conf/ccga.config" }
ccga_dx { includeConfig "${params.custom_config_base}/conf/ccga_dx.config" }
cfc { includeConfig "${params.custom_config_base}/conf/cfc.config" }
crick { includeConfig "${params.custom_config_base}/conf/crick.config" }
czbiohub_aws { includeConfig "${params.custom_config_base}/conf/czbiohub_aws.config" }
czbiohub_aws_highpriority {
includeConfig "${params.custom_config_base}/conf/czbiohub_aws.config"
includeConfig "${params.custom_config_base}/conf/czbiohub_aws_highpriority.config"
}
gis { includeConfig "${params.custom_config_base}/conf/gis.config" }
hebbe { includeConfig "${params.custom_config_base}/conf/hebbe.config" }
mendel { includeConfig "${params.custom_config_base}/conf/mendel.config" }
@ -29,6 +35,7 @@ profiles {
uzh { includeConfig "${params.custom_config_base}/conf/uzh.config" }
prince { includeConfig "${params.custom_config_base}/conf/prince.config" }
bigpurple { includeConfig "${params.custom_config_base}/conf/bigpurple.config" }
pasteur { includeConfig "${params.custom_config_base}/conf/pasteur.config" }
}
// If user hostnames contain one of these substring and they are