mirror of
https://github.com/MillironX/nf-configs.git
synced 2024-11-26 01:39:55 +00:00
Merge remote-tracking branch 'upstream/master'
This commit is contained in:
commit
35c715ca3b
21 changed files with 453 additions and 26 deletions
1
.gitignore
vendored
1
.gitignore
vendored
|
@ -3,3 +3,4 @@ work/
|
||||||
data/
|
data/
|
||||||
results/
|
results/
|
||||||
.DS_Store
|
.DS_Store
|
||||||
|
*.code-workspace
|
|
@ -1,6 +1,4 @@
|
||||||
<img src="docs/images/nf-core-logo.png" width="400">
|
# ![nf-core/configs](docs/images/nfcore-configs_logo.png)
|
||||||
|
|
||||||
# [nf-core/configs](https://github.com/nf-core/configs)
|
|
||||||
|
|
||||||
[![Build Status](https://travis-ci.org/nf-core/configs.svg?branch=master)](https://travis-ci.org/nf-core/configs)
|
[![Build Status](https://travis-ci.org/nf-core/configs.svg?branch=master)](https://travis-ci.org/nf-core/configs)
|
||||||
|
|
||||||
|
@ -88,6 +86,7 @@ Currently documentation is available for the following clusters:
|
||||||
* [BINAC](docs/binac.md)
|
* [BINAC](docs/binac.md)
|
||||||
* [BIGPURPLE](docs/bigpurple.md)
|
* [BIGPURPLE](docs/bigpurple.md)
|
||||||
* [CCGA](docs/ccga.md)
|
* [CCGA](docs/ccga.md)
|
||||||
|
* [CCGA_DX](/docs/ccga_dx.md)
|
||||||
* [CFC](docs/binac.md)
|
* [CFC](docs/binac.md)
|
||||||
* [CRICK](docs/crick.md)
|
* [CRICK](docs/crick.md)
|
||||||
* [GIS](docs/gis.md)
|
* [GIS](docs/gis.md)
|
||||||
|
@ -101,6 +100,7 @@ Currently documentation is available for the following clusters:
|
||||||
* [UPPMAX-DEVEL](docs/uppmax-devel.md)
|
* [UPPMAX-DEVEL](docs/uppmax-devel.md)
|
||||||
* [UPPMAX](docs/uppmax.md)
|
* [UPPMAX](docs/uppmax.md)
|
||||||
* [UZH](docs/uzh.md)
|
* [UZH](docs/uzh.md)
|
||||||
|
* [PASTEUR](docs/pasteur.md)
|
||||||
|
|
||||||
### Uploading to `nf-core/configs`
|
### Uploading to `nf-core/configs`
|
||||||
|
|
||||||
|
|
|
@ -10,7 +10,7 @@ singularity {
|
||||||
}
|
}
|
||||||
|
|
||||||
process {
|
process {
|
||||||
beforeScript = 'module load devel/singularity/3.0.1'
|
beforeScript = 'module load devel/singularity/3.0.3'
|
||||||
executor = 'pbs'
|
executor = 'pbs'
|
||||||
queue = 'short'
|
queue = 'short'
|
||||||
}
|
}
|
||||||
|
|
|
@ -2,15 +2,20 @@
|
||||||
params {
|
params {
|
||||||
config_profile_description = 'CCGA cluster profile provided by nf-core/configs.'
|
config_profile_description = 'CCGA cluster profile provided by nf-core/configs.'
|
||||||
config_profile_contact = 'Marc Hoeppner (@marchoeppner)'
|
config_profile_contact = 'Marc Hoeppner (@marchoeppner)'
|
||||||
config_profile_url = 'https://www.ikmb.uni-kiel.de/'
|
config_profile_url = 'https://www.ccga.uni-kiel.de/'
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* -------------------------------------------------
|
* -------------------------------------------------
|
||||||
* Nextflow config file with environment modules for RZCluster in Kiel
|
* Nextflow config file for CCGA cluster in Kiel
|
||||||
* -------------------------------------------------
|
* -------------------------------------------------
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
singularity {
|
||||||
|
enabled = true
|
||||||
|
runOptions = "-B /ifs -B /scratch -B /work_beegfs"
|
||||||
|
}
|
||||||
|
|
||||||
executor {
|
executor {
|
||||||
queueSize=100
|
queueSize=100
|
||||||
}
|
}
|
||||||
|
@ -29,4 +34,7 @@ params {
|
||||||
// illumina iGenomes reference file paths on RZCluster
|
// illumina iGenomes reference file paths on RZCluster
|
||||||
igenomes_base = '/ifs/data/nfs_share/ikmb_repository/references/iGenomes/references/'
|
igenomes_base = '/ifs/data/nfs_share/ikmb_repository/references/iGenomes/references/'
|
||||||
saveReference = true
|
saveReference = true
|
||||||
|
max_memory = 128.GB
|
||||||
|
max_cpus = 16
|
||||||
|
max_time = 120.h
|
||||||
}
|
}
|
||||||
|
|
37
conf/ccga_dx.config
Normal file
37
conf/ccga_dx.config
Normal file
|
@ -0,0 +1,37 @@
|
||||||
|
//Profile config names for nf-core/configs
|
||||||
|
params {
|
||||||
|
config_profile_description = 'CCGA DX cluster profile provided by nf-core/configs.'
|
||||||
|
config_profile_contact = 'Marc Hoeppner (@marchoeppner)'
|
||||||
|
config_profile_url = 'https://www.ccga.uni-kiel.de/'
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* -------------------------------------------------
|
||||||
|
* Nextflow config file for CCGA cluster in Kiel
|
||||||
|
* -------------------------------------------------
|
||||||
|
*/
|
||||||
|
|
||||||
|
singularity {
|
||||||
|
enabled = true
|
||||||
|
}
|
||||||
|
|
||||||
|
executor {
|
||||||
|
queueSize=100
|
||||||
|
}
|
||||||
|
|
||||||
|
process {
|
||||||
|
|
||||||
|
// Global process config
|
||||||
|
executor = 'slurm'
|
||||||
|
queue = 'htc'
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
params {
|
||||||
|
// illumina iGenomes reference file paths on DX Cluster
|
||||||
|
igenomes_base = '/mnt/ld_ng_out/sukmb352/references/iGenomes/references/'
|
||||||
|
saveReference = true
|
||||||
|
max_memory = 250.GB
|
||||||
|
max_cpus = 20
|
||||||
|
max_time = 240.h
|
||||||
|
}
|
|
@ -14,6 +14,11 @@ process {
|
||||||
executor = 'slurm'
|
executor = 'slurm'
|
||||||
}
|
}
|
||||||
|
|
||||||
|
weblog{
|
||||||
|
enabled = true
|
||||||
|
url = 'http://services.qbic.uni-tuebingen.de:8080/workflowservice/workflows'
|
||||||
|
}
|
||||||
|
|
||||||
params {
|
params {
|
||||||
igenomes_base = '/nfsmounts/igenomes'
|
igenomes_base = '/nfsmounts/igenomes'
|
||||||
max_memory = 60.GB
|
max_memory = 60.GB
|
||||||
|
|
129
conf/czbiohub_aws.config
Normal file
129
conf/czbiohub_aws.config
Normal file
|
@ -0,0 +1,129 @@
|
||||||
|
/*
|
||||||
|
* -------------------------------------------------
|
||||||
|
* Nextflow config file for Chan Zuckerberg Biohub
|
||||||
|
* -------------------------------------------------
|
||||||
|
* Defines reference genomes, using iGenome paths
|
||||||
|
* Imported under the default 'standard' Nextflow
|
||||||
|
* profile in nextflow.config
|
||||||
|
*/
|
||||||
|
|
||||||
|
//Profile config names for nf-core/configs
|
||||||
|
params {
|
||||||
|
config_profile_description = 'Chan Zuckerberg Biohub AWS Batch profile provided by nf-core/configs.'
|
||||||
|
config_profile_contact = 'Olga Botvinnik (@olgabot)'
|
||||||
|
config_profile_url = 'https://www.czbiohub.org/'
|
||||||
|
}
|
||||||
|
|
||||||
|
docker {
|
||||||
|
enabled = true
|
||||||
|
}
|
||||||
|
|
||||||
|
process {
|
||||||
|
executor = 'awsbatch'
|
||||||
|
queue = 'default-971039e0-830c-11e9-9e0b-02c5b84a8036'
|
||||||
|
errorStrategy = 'ignore'
|
||||||
|
}
|
||||||
|
|
||||||
|
workDir = "s3://czb-nextflow/intermediates/"
|
||||||
|
|
||||||
|
aws.region = 'us-west-2'
|
||||||
|
executor.awscli = '/home/ec2-user/miniconda/bin/aws'
|
||||||
|
params.tracedir = './'
|
||||||
|
|
||||||
|
params {
|
||||||
|
saveReference = true
|
||||||
|
|
||||||
|
// Largest SPOT instances available on AWS: https://ec2instances.info/
|
||||||
|
max_memory = 1952.GB
|
||||||
|
max_cpus = 96
|
||||||
|
max_time = 240.h
|
||||||
|
|
||||||
|
// Compatible with multiple versions of rnaseq pipeline
|
||||||
|
seq_center = "czbiohub"
|
||||||
|
seqCenter = "czbiohub"
|
||||||
|
|
||||||
|
// illumina iGenomes reference file paths on CZ Biohub reference s3 bucket
|
||||||
|
// No final slash because it's added later
|
||||||
|
igenomes_base = "s3://czbiohub-reference/igenomes"
|
||||||
|
|
||||||
|
// GENCODE (human + mouse) reference file paths on CZ Biohub reference s3 bucket
|
||||||
|
// No final slash because it's added later
|
||||||
|
gencode_base = "s3://czbiohub-reference/gencode"
|
||||||
|
transgenes_base = "s3://czbiohub-reference/transgenes"
|
||||||
|
|
||||||
|
// AWS configurations
|
||||||
|
awsregion = "us-west-2"
|
||||||
|
awsqueue = "nextflow"
|
||||||
|
|
||||||
|
igenomesIgnore = true
|
||||||
|
|
||||||
|
fc_extra_attributes = 'gene_name'
|
||||||
|
fc_group_features = 'gene_id'
|
||||||
|
fc_group_features_type = 'gene_type'
|
||||||
|
|
||||||
|
trim_pattern = '_+S\\d+'
|
||||||
|
|
||||||
|
// GENCODE GTF and fasta files
|
||||||
|
genomes {
|
||||||
|
'GRCh38' {
|
||||||
|
fasta = "${params.gencode_base}/human/v30/GRCh38.p12.genome.ERCC92.fa"
|
||||||
|
gtf = "${params.gencode_base}/human/v30/gencode.v30.annotation.ERCC92.gtf"
|
||||||
|
transcript_fasta = "${params.gencode_base}/human/v30/gencode.v30.transcripts.ERCC92.fa"
|
||||||
|
star = "${params.gencode_base}/human/v30/STARIndex/"
|
||||||
|
salmon_index = "${params.gencode_base}/human/v30/salmon_index/"
|
||||||
|
}
|
||||||
|
'GRCm38' {
|
||||||
|
fasta = "${params.gencode_base}/mouse/vM21/GRCm38.p6.genome.ERCC92.fa"
|
||||||
|
gtf = "${params.gencode_base}/mouse/vM21/gencode.vM21.annotation.ERCC92.gtf"
|
||||||
|
transcript_fasta = "${params.gencode_base}/mouse/vM21/gencode.vM21.transcripts.ERCC92.fa"
|
||||||
|
star = "${params.gencode_base}/mouse/vM21/STARIndex/"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
transgenes {
|
||||||
|
'ChR2' {
|
||||||
|
fasta = "${params.transgenes_base}/ChR2/ChR2.fa"
|
||||||
|
gtf = "${params.transgenes_base}/ChR2/ChR2.gtf"
|
||||||
|
}
|
||||||
|
'Cre' {
|
||||||
|
fasta = "${params.transgenes_base}/Cre/Cre.fa"
|
||||||
|
gtf = "${params.transgenes_base}/Cre/Cre.gtf"
|
||||||
|
}
|
||||||
|
'ERCC' {
|
||||||
|
fasta = "${params.transgenes_base}/ERCC92/ERCC92.fa"
|
||||||
|
gtf = "${params.transgenes_base}/ERCC92/ERCC92.gtf"
|
||||||
|
}
|
||||||
|
'GCaMP6m' {
|
||||||
|
fasta = "${params.transgenes_base}/GCaMP6m/GCaMP6m.fa"
|
||||||
|
gtf = "${params.transgenes_base}/GCaMP6m/GCaMP6m.gtf"
|
||||||
|
}
|
||||||
|
'GFP' {
|
||||||
|
fasta = "${params.transgenes_base}/Gfp/Gfp.fa"
|
||||||
|
gtf = "${params.transgenes_base}/Gfp/Gfp.gtf"
|
||||||
|
}
|
||||||
|
'NpHR' {
|
||||||
|
fasta = "${params.transgenes_base}/NpHR/NpHR.fa"
|
||||||
|
gtf = "${params.transgenes_base}/NpHR/NpHR.gtf"
|
||||||
|
}
|
||||||
|
'RCaMP' {
|
||||||
|
fasta = "${params.transgenes_base}/RCaMP/RCaMP.fa"
|
||||||
|
gtf = "${params.transgenes_base}/RCaMP/RCaMP.gtf"
|
||||||
|
}
|
||||||
|
'RGECO' {
|
||||||
|
fasta = "${params.transgenes_base}/RGECO/RGECO.fa"
|
||||||
|
gtf = "${params.transgenes_base}/RGECO/RGECO.gtf"
|
||||||
|
}
|
||||||
|
'Tdtom' {
|
||||||
|
fasta = "${params.transgenes_base}/Tdtom/Tdtom.fa"
|
||||||
|
gtf = "${params.transgenes_base}/Tdtom/Tdtom.gtf"
|
||||||
|
}
|
||||||
|
'Car-T' {
|
||||||
|
fasta = "${params.transgenes_base}/car-t/car-t.fa"
|
||||||
|
gtf = "${params.transgenes_base}/car-t/car-t.gtf"
|
||||||
|
}
|
||||||
|
'zsGreen' {
|
||||||
|
fasta = "${params.transgenes_base}/zsGreen/zsGreen.fa"
|
||||||
|
gtf = "${params.transgenes_base}/zsGreen/zsGreen.gtf"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
12
conf/czbiohub_aws_highpriority.config
Normal file
12
conf/czbiohub_aws_highpriority.config
Normal file
|
@ -0,0 +1,12 @@
|
||||||
|
/*
|
||||||
|
* -------------------------------------------------
|
||||||
|
* Nextflow config file for Chan Zuckerberg Biohub
|
||||||
|
* -------------------------------------------------
|
||||||
|
* Defines reference genomes, using iGenome paths
|
||||||
|
* Imported under the default 'standard' Nextflow
|
||||||
|
* profile in nextflow.config
|
||||||
|
*/
|
||||||
|
|
||||||
|
process {
|
||||||
|
queue = 'highpriority-971039e0-830c-11e9-9e0b-02c5b84a8036'
|
||||||
|
}
|
|
@ -10,7 +10,7 @@ manifest {
|
||||||
}
|
}
|
||||||
|
|
||||||
process {
|
process {
|
||||||
beforeScript = {'module load Singularity; module load Miniconda3'}
|
beforeScript = 'module load Miniconda3/4.6.7'
|
||||||
executor = 'pbspro'
|
executor = 'pbspro'
|
||||||
clusterOptions = { "-P $params.project" }
|
clusterOptions = { "-P $params.project" }
|
||||||
}
|
}
|
||||||
|
|
|
@ -28,5 +28,5 @@ params {
|
||||||
max_cpus = 16
|
max_cpus = 16
|
||||||
max_time = 72.h
|
max_time = 72.h
|
||||||
// illumina iGenomes reference file paths on UPPMAX
|
// illumina iGenomes reference file paths on UPPMAX
|
||||||
igenomes_base = '/data0/btb/references/igenomes/'
|
igenomes_base = '/data1/references/igenomes/'
|
||||||
}
|
}
|
||||||
|
|
23
conf/pasteur.config
Normal file
23
conf/pasteur.config
Normal file
|
@ -0,0 +1,23 @@
|
||||||
|
//Profile config names for nf-core/configs
|
||||||
|
params {
|
||||||
|
config_profile_description = 'The Institut Pasteur HPC cluster profile'
|
||||||
|
config_profile_contact = 'Remi Planel (@rplanel)'
|
||||||
|
config_profile_url = 'https://research.pasteur.fr/en/service/tars-cluster'
|
||||||
|
}
|
||||||
|
|
||||||
|
singularity {
|
||||||
|
enabled = true
|
||||||
|
autoMounts = true
|
||||||
|
runOptions = '-B /local/scratch:/tmp'
|
||||||
|
}
|
||||||
|
|
||||||
|
process {
|
||||||
|
executor = 'slurm'
|
||||||
|
}
|
||||||
|
|
||||||
|
params {
|
||||||
|
igenomesIgnore = true
|
||||||
|
max_memory = 256.GB
|
||||||
|
max_cpus = 28
|
||||||
|
max_time = 24.h
|
||||||
|
}
|
|
@ -1,4 +1,6 @@
|
||||||
singularityDir = "$SCRATCH/singularity_images_nextflow"
|
singularityDir = "$SCRATCH/singularity_images_nextflow"
|
||||||
|
singularityModule = "singularity/3.2.1"
|
||||||
|
squashfsModule = "squashfs/4.3"
|
||||||
|
|
||||||
params {
|
params {
|
||||||
config_profile_description = """
|
config_profile_description = """
|
||||||
|
@ -17,8 +19,8 @@ singularity {
|
||||||
|
|
||||||
process {
|
process {
|
||||||
beforeScript = """
|
beforeScript = """
|
||||||
module load singularity/3.1.0
|
module load $singularityModule
|
||||||
module load squashfs/4.3
|
module load $squashfsModule
|
||||||
"""
|
"""
|
||||||
.stripIndent()
|
.stripIndent()
|
||||||
executor = 'slurm'
|
executor = 'slurm'
|
||||||
|
|
|
@ -8,12 +8,16 @@ params {
|
||||||
singularity {
|
singularity {
|
||||||
enabled = true
|
enabled = true
|
||||||
autoMounts = true
|
autoMounts = true
|
||||||
|
runOptions = '-B /run/shm:/run/shm'
|
||||||
cacheDir = "/projects1/singularity_scratch/cache/"
|
cacheDir = "/projects1/singularity_scratch/cache/"
|
||||||
}
|
}
|
||||||
|
|
||||||
process {
|
process {
|
||||||
executor = 'slurm'
|
executor = 'slurm'
|
||||||
queue = 'short'
|
queue = 'short'
|
||||||
|
}
|
||||||
|
|
||||||
|
executor {
|
||||||
queueSize = 16
|
queueSize = 16
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
18
docs/ccga.md
Normal file
18
docs/ccga.md
Normal file
|
@ -0,0 +1,18 @@
|
||||||
|
# nf-core/configs: CCGA Configuration
|
||||||
|
|
||||||
|
Deployment and testing of nf-core pipelines at the CCGA cluster is on-going.
|
||||||
|
|
||||||
|
To use, run the pipeline with `-profile ccga`. This will download and launch the [`ccga.config`](../conf/ccga.config) which has been pre-configured with a setup suitable for the CCGA cluster. Using this profile, a docker image containing all of the required software will be downloaded, and converted to a Singularity image before execution of the pipeline.
|
||||||
|
|
||||||
|
Before running the pipeline you will need to load Nextflow and Singularity using the environment module system on the cluster. You can do this by issuing the commands below:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
## Load Nextflow and Singularity environment modules
|
||||||
|
module purge
|
||||||
|
module load IKMB
|
||||||
|
module load Java/1.8.0
|
||||||
|
module load Nextflow
|
||||||
|
module load singularity3.1.0
|
||||||
|
```
|
||||||
|
|
||||||
|
>NB: Access to the CCGA cluster is restricted to IKMB/CCGA employes. Please talk to Marc Hoeppner to get access (@marchoeppner).
|
9
docs/ccga_dx.md
Normal file
9
docs/ccga_dx.md
Normal file
|
@ -0,0 +1,9 @@
|
||||||
|
# nf-core/configs: CCGA DX Configuration
|
||||||
|
|
||||||
|
Deployment and testing of nf-core pipelines at the CCGA DX cluster is on-going.
|
||||||
|
|
||||||
|
To use, run the pipeline with `-profile ccga_dx`. This will download and launch the [`ccga_dx.config`](../conf/ccga_dx.config) which has been pre-configured with a setup suitable for the CCGA cluster. Using this profile, a docker image containing all of the required software will be downloaded, and converted to a Singularity image before execution of the pipeline.
|
||||||
|
|
||||||
|
Before running the pipeline you will need to have Nextflow installed.
|
||||||
|
|
||||||
|
>NB: Access to the CCGA DX cluster is restricted to IKMB/CCGA employes. Please talk to Marc Hoeppner to get access (@marchoeppner).
|
|
@ -10,7 +10,7 @@ Before running the pipeline you will need to load Nextflow and Singularity using
|
||||||
## Load Nextflow and Singularity environment modules
|
## Load Nextflow and Singularity environment modules
|
||||||
module purge
|
module purge
|
||||||
module load devel/java_jdk/1.8.0u121
|
module load devel/java_jdk/1.8.0u121
|
||||||
module load qbic/singularity_slurm/3.0.1
|
module load qbic/singularity_slurm/3.0.3
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
|
|
128
docs/czbiohub.md
Normal file
128
docs/czbiohub.md
Normal file
|
@ -0,0 +1,128 @@
|
||||||
|
# nf-core/configs: CZ Biohub Configuration
|
||||||
|
|
||||||
|
All nf-core pipelines have been successfully configured for use on the AWS Batch at the Chan Zuckerberg Biohub here.
|
||||||
|
|
||||||
|
To use, run the pipeline with `-profile czbiohub_aws`. This will download and launch the [`czbiohub_aws.config`](../conf/czbiohub_aws.config) which has been pre-configured with a setup suitable for the AWS Batch. Using this profile, a docker image containing all of the required software will be downloaded, and converted to a Singularity image before execution of the pipeline.
|
||||||
|
|
||||||
|
Ask Olga (olga.botvinnik@czbiohub.org) if you have any questions!
|
||||||
|
|
||||||
|
## Run the pipeline from a small AWS EC2 Instance
|
||||||
|
|
||||||
|
The pipeline will monitor and submit jobs to AWS Batch on your behalf. To ensure that the pipeline is successful, it will need to be run from a computer that has constant internet connection. Unfortunately for us, Biohub has spotty WiFi and even for short pipelines, it is highly recommended to run them from AWS.
|
||||||
|
|
||||||
|
### 1. Start tmux
|
||||||
|
|
||||||
|
[tmux](https://hackernoon.com/a-gentle-introduction-to-tmux-8d784c404340) is a "Terminal Multiplexer" that allows for commands to continue running even when you have closed your laptop. Start a new tmux session with `tmux new` and we'll name this session `nextflow`.
|
||||||
|
|
||||||
|
```
|
||||||
|
tmux new -n nextflow
|
||||||
|
```
|
||||||
|
|
||||||
|
Now you can run pipelines with abandon!
|
||||||
|
|
||||||
|
### 2. Make a GitHub repo for your workflows (optional :)
|
||||||
|
|
||||||
|
|
||||||
|
To make sharing your pipelines and commands easy between your teammates, it's best to share code in a GitHub repository. One way is to store the commands in a Makefile ([example](https://github.com/czbiohub/kh-workflows/blob/master/nf-kmer-similarity/Makefile)) which can contain multiple `nextflow run` commands so that you don't need to remember the S3 bucket or output directory for every single one. [Makefiles](https://kbroman.org/minimal_make/) are broadly used in the software community for running many complex commands. Makefiles can have a lot of dependencies and be confusing, so we're only going to write *simple* Makefiles.
|
||||||
|
|
||||||
|
```
|
||||||
|
rnaseq:
|
||||||
|
nextflow run -profile czbiohub_aws nf-core/rnaseq \
|
||||||
|
--reads 's3://czb-maca/Plate_seq/24_month/180626_A00111_0166_BH5LNVDSXX/fastqs/*{R1,R2}*.fastq.gz' \
|
||||||
|
--genome GRCm38 \
|
||||||
|
--outdir s3://olgabot-maca/nextflow-test/
|
||||||
|
|
||||||
|
human_mouse_zebrafish:
|
||||||
|
nextflow run czbiohub/nf-kmer-similarity -latest -profile aws \
|
||||||
|
--samples s3://kmer-hashing/hematopoeisis/smartseq2/human_mouse_zebrafish/samples.csv
|
||||||
|
|
||||||
|
|
||||||
|
merkin2012_aws:
|
||||||
|
nextflow run czbiohub/nf-kmer-similarity -latest --sra "SRP016501" \
|
||||||
|
-r olgabot/support-csv-directory-or-sra \
|
||||||
|
-profile aws
|
||||||
|
```
|
||||||
|
|
||||||
|
In this example, one would run the `rnaseq` rule and the nextflow command beneath it with:
|
||||||
|
|
||||||
|
```
|
||||||
|
make rnaseq
|
||||||
|
```
|
||||||
|
|
||||||
|
If one wanted to run a different command, e.g. `human_mouse_zebrafish`, they would specify that command instead. For example:
|
||||||
|
|
||||||
|
```
|
||||||
|
make human_mouse_zebrafish
|
||||||
|
```
|
||||||
|
|
||||||
|
Makefiles are a very useful way of storing longer commands with short mnemonic words.
|
||||||
|
|
||||||
|
|
||||||
|
Once you [create a new repository](https://github.com/organizations/czbiohub/repositories/new) (best to initialize with a `.gitignore`, license - MIT and `README`), clone that repository to your EC2 instance. For example, if the repository is called `kh-workflows`, this is what the command would look like:
|
||||||
|
|
||||||
|
```
|
||||||
|
git clone https://github.com/czbiohub/kh-workflows
|
||||||
|
```
|
||||||
|
|
||||||
|
Now both create and edit a `Makefile`:
|
||||||
|
|
||||||
|
```
|
||||||
|
cd
|
||||||
|
nano Makefile
|
||||||
|
```
|
||||||
|
|
||||||
|
Write your rule with a colon after it, and on the next line must be a **tab**, not spaces. Once you're done, exit the program (the `^` command shown in nano means "Control"), write the file, add it to git, commit it, and push it up to GitHub.
|
||||||
|
|
||||||
|
|
||||||
|
```
|
||||||
|
git add Makefile
|
||||||
|
git commit -m "Added makefile"
|
||||||
|
git push origin master
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
### 3. Run your workflow!!
|
||||||
|
|
||||||
|
|
||||||
|
Remember to specify `-profile czbiohub_aws` to grab the CZ Biohub-specific AWS configurations, and an `--outdir` with an AWS S3 bucket so you don't run out of space on your small AMI
|
||||||
|
|
||||||
|
```
|
||||||
|
nextflow run -profile czbiohub_aws nf-core/rnaseq \
|
||||||
|
--reads 's3://czb-maca/Plate_seq/24_month/180626_A00111_0166_BH5LNVDSXX/fastqs/*{R1,R2}*.fastq.gz' \
|
||||||
|
--genome GRCm38 \
|
||||||
|
--outdir s3://olgabot-maca/nextflow-test/
|
||||||
|
```
|
||||||
|
|
||||||
|
### 4. If you lose connection, how do you restart the jobs?
|
||||||
|
|
||||||
|
If you close your laptop, get onto the train, or lose WiFi connection, you may lose connection to AWS and may need to restart the jobs. To reattach, use the command `tmux attach` and you should see your Nextflow output! To get the named session, use:
|
||||||
|
|
||||||
|
```
|
||||||
|
tmux attach -n nextflow
|
||||||
|
```
|
||||||
|
|
||||||
|
To restart the jobs from where you left off, add the `-resume` flag to your `nextflow` command:
|
||||||
|
|
||||||
|
|
||||||
|
```
|
||||||
|
nextflow run -profile czbiohub_aws nf-core/rnaseq \
|
||||||
|
--reads 's3://czb-maca/Plate_seq/24_month/180626_A00111_0166_BH5LNVDSXX/fastqs/*{R1,R2}*.fastq.gz' \
|
||||||
|
--genome GRCm38 \
|
||||||
|
--outdir s3://olgabot-maca/nextflow-test/ \
|
||||||
|
-resume
|
||||||
|
```
|
||||||
|
|
||||||
|
It's important that this command be re-run from the same directory as there is a "hidden" `.nextflow` folder that contains all the metadata and information about previous runs.
|
||||||
|
|
||||||
|
## iGenomes specific configuration
|
||||||
|
|
||||||
|
A local copy of the iGenomes resource has been made available on `s3://czbiohub-reference/igenomes` (in `us-west-2` region) so you should be able to run the pipeline against any reference available in the `igenomes.config` specific to the nf-core pipeline.
|
||||||
|
|
||||||
|
You can do this by simply using the `--genome <GENOME_ID>` parameter.
|
||||||
|
|
||||||
|
For Human and Mouse, we use [GENCODE](https://www.gencodegenes.org/) gene annotations. This doesn't change how you would specify the genome name, only that the pipelines run with the `czbiohub_aws` profile would be with GENCODE rather than iGenomes.
|
||||||
|
|
||||||
|
|
||||||
|
>NB: You will need an account to use the HPC cluster on PROFILE CLUSTER in order to run the pipeline. If in doubt contact IT.
|
||||||
|
|
||||||
|
>NB: Nextflow will need to submit the jobs via the job scheduler to the HPC cluster and as such the commands above will have to be executed on one of the login nodes. If in doubt contact IT.
|
BIN
docs/images/nfcore-configs_logo.png
Executable file
BIN
docs/images/nfcore-configs_logo.png
Executable file
Binary file not shown.
After Width: | Height: | Size: 14 KiB |
|
@ -2,27 +2,15 @@
|
||||||
|
|
||||||
All nf-core pipelines have been successfully configured for use on the MENDEL CLUSTER at the Gregor Mendel Institute (GMI).
|
All nf-core pipelines have been successfully configured for use on the MENDEL CLUSTER at the Gregor Mendel Institute (GMI).
|
||||||
|
|
||||||
To use, run the pipeline with `-profile conda,mendel`. This will download and launch the [`mendel.config`](../conf/mendel.config) which has been pre-configured with a setup suitable for the MENDEL cluster. A Conda environment will be created automatically and software dependencies will be downloaded from ['bioconda'](https://bioconda.github.io/).
|
To use, run the pipeline with `-profile conda,mendel`. This will download and launch the [`mendel.config`](../conf/mendel.config) which has been pre-configured with a setup suitable for the MENDEL cluster. A Conda environment will be created automatically and software dependencies will be resolved via [bioconda](https://bioconda.github.io/).
|
||||||
|
|
||||||
Theoretically, using `-profile singularity,mendel` would download a docker image containing all of the required software, and convert it to a Singularity image before execution of the pipeline. However, there is a regression in the Singularity deployment on MENDEL which renders containers downloaded from public repositories unusable because they lack the /lustre mountpoint.
|
Before running the pipeline you will need to load Conda using the environment module system on MENDEL. You can do this by issuing the commands below:
|
||||||
|
|
||||||
If you want to run the pipeline containerized anyway you will have to build the image yourself (on a machine where you have root access) using the provided `Singularity` file in the pipeline repository:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
cd /path/to/pipeline-repository
|
|
||||||
echo 'mkdir /lustre > Singularity'
|
|
||||||
singularity build nf-core-methylseq-custom.simg Singularity
|
|
||||||
```
|
|
||||||
|
|
||||||
After you copied the container image to the cluster filesystem, make sure to pass the path to the image to the pipeline with `-with-singularity /path/to/nf-core-methylseq-custom.simg`
|
|
||||||
|
|
||||||
Before running the pipeline you will need to load Nextflow and Conda using the environment module system on MENDEL. You can do this by issuing the commands below:
|
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
## Load Nextflow and Conda environment modules
|
## Load Nextflow and Conda environment modules
|
||||||
module purge
|
module purge
|
||||||
module load Nextflow
|
module load Nextflow
|
||||||
module load Miniconda3 # not needed if using Singularity
|
module load Miniconda/4.6.7
|
||||||
```
|
```
|
||||||
|
|
||||||
>NB: You will need an account to use the HPC cluster in order to run the pipeline. If in doubt contact the HPC team.
|
>NB: You will need an account to use the HPC cluster in order to run the pipeline. If in doubt contact the HPC team.
|
||||||
|
|
56
docs/pasteur.md
Normal file
56
docs/pasteur.md
Normal file
|
@ -0,0 +1,56 @@
|
||||||
|
# nf-core/configs: Institut Pasteur Configuration
|
||||||
|
|
||||||
|
All nf-core pipelines have been successfully configured for use on the tars cluster at the Institut Pasteur.
|
||||||
|
|
||||||
|
To use, run the pipeline with `-profile pasteur`. This will download and launch the [`pasteur.config`](../conf/pasteur.config) which has been pre-configured with a setup suitable for the Pasteur cluster. Using this profile, a docker image containing all of the required software will be downloaded, and converted to a Singularity image before execution of the pipeline.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
## Running the workflow on the Pasteur cluster
|
||||||
|
|
||||||
|
Nextflow is not installed by default on the Pasteur cluster.
|
||||||
|
- Install Nextflow : [here](https://www.nextflow.io/docs/latest/getstarted.html#)
|
||||||
|
|
||||||
|
|
||||||
|
Nextflow manages each process as a separate job that is submitted to the cluster by using the sbatch command.
|
||||||
|
Nextflow shouldn't run directly on the submission node but on a compute node.
|
||||||
|
The compute nodes don't have access to internet so you need to run it offline.
|
||||||
|
|
||||||
|
To do that:
|
||||||
|
1. Create a virtualenv to install nf-core
|
||||||
|
```bash
|
||||||
|
module purge
|
||||||
|
module load Python/3.6.0
|
||||||
|
module load java
|
||||||
|
module load singularity
|
||||||
|
cd /path/to/nf-core/workflows
|
||||||
|
virtualenv .venv -p python3
|
||||||
|
. .venv/bin/activate
|
||||||
|
```
|
||||||
|
2. Install nf-core: [here](https://nf-co.re/tools#installation)
|
||||||
|
3. Get nf-core pipeline and container: [here](https://nf-co.re/tools#downloading-pipelines-for-offline-use)
|
||||||
|
4. Get the nf-core Pasteur profile: [here](https://github.com/nf-core/rnaseq/blob/master/docs/usage.md#--custom_config_base)
|
||||||
|
5. Run nextflow on a compute node:
|
||||||
|
```bash
|
||||||
|
# create a terminal
|
||||||
|
tmux
|
||||||
|
|
||||||
|
# Get a compute node
|
||||||
|
salloc
|
||||||
|
|
||||||
|
# Load the dependencies if not done before
|
||||||
|
module purge
|
||||||
|
module load java
|
||||||
|
module load singularity
|
||||||
|
|
||||||
|
# Run nextflow workflow
|
||||||
|
nextflow run \\
|
||||||
|
/path/to/pipeline-dir/from/step/3/workflow \\
|
||||||
|
-resume
|
||||||
|
-profile pasteur \\
|
||||||
|
-with-singularity /path/to/pipeline-dir/from/step/3/singularity-images/singularity.img \\
|
||||||
|
--email my-email@pasteur.fr \\
|
||||||
|
--custom_config_base /path/to/configs/from/step/4/ \\
|
||||||
|
-c my-specific.config
|
||||||
|
...
|
||||||
|
```
|
|
@ -15,8 +15,14 @@ profiles {
|
||||||
aquila { includeConfig "${params.custom_config_base}/conf/aquila.config" }
|
aquila { includeConfig "${params.custom_config_base}/conf/aquila.config" }
|
||||||
binac { includeConfig "${params.custom_config_base}/conf/binac.config" }
|
binac { includeConfig "${params.custom_config_base}/conf/binac.config" }
|
||||||
ccga { includeConfig "${params.custom_config_base}/conf/ccga.config" }
|
ccga { includeConfig "${params.custom_config_base}/conf/ccga.config" }
|
||||||
|
ccga_dx { includeConfig "${params.custom_config_base}/conf/ccga_dx.config" }
|
||||||
cfc { includeConfig "${params.custom_config_base}/conf/cfc.config" }
|
cfc { includeConfig "${params.custom_config_base}/conf/cfc.config" }
|
||||||
crick { includeConfig "${params.custom_config_base}/conf/crick.config" }
|
crick { includeConfig "${params.custom_config_base}/conf/crick.config" }
|
||||||
|
czbiohub_aws { includeConfig "${params.custom_config_base}/conf/czbiohub_aws.config" }
|
||||||
|
czbiohub_aws_highpriority {
|
||||||
|
includeConfig "${params.custom_config_base}/conf/czbiohub_aws.config"
|
||||||
|
includeConfig "${params.custom_config_base}/conf/czbiohub_aws_highpriority.config"
|
||||||
|
}
|
||||||
gis { includeConfig "${params.custom_config_base}/conf/gis.config" }
|
gis { includeConfig "${params.custom_config_base}/conf/gis.config" }
|
||||||
hebbe { includeConfig "${params.custom_config_base}/conf/hebbe.config" }
|
hebbe { includeConfig "${params.custom_config_base}/conf/hebbe.config" }
|
||||||
mendel { includeConfig "${params.custom_config_base}/conf/mendel.config" }
|
mendel { includeConfig "${params.custom_config_base}/conf/mendel.config" }
|
||||||
|
@ -29,6 +35,7 @@ profiles {
|
||||||
uzh { includeConfig "${params.custom_config_base}/conf/uzh.config" }
|
uzh { includeConfig "${params.custom_config_base}/conf/uzh.config" }
|
||||||
prince { includeConfig "${params.custom_config_base}/conf/prince.config" }
|
prince { includeConfig "${params.custom_config_base}/conf/prince.config" }
|
||||||
bigpurple { includeConfig "${params.custom_config_base}/conf/bigpurple.config" }
|
bigpurple { includeConfig "${params.custom_config_base}/conf/bigpurple.config" }
|
||||||
|
pasteur { includeConfig "${params.custom_config_base}/conf/pasteur.config" }
|
||||||
}
|
}
|
||||||
|
|
||||||
// If user hostnames contain one of these substring and they are
|
// If user hostnames contain one of these substring and they are
|
||||||
|
|
Loading…
Reference in a new issue