From acad9ac7e5eb850361a4797000199641ef29b3e9 Mon Sep 17 00:00:00 2001
From: Olga Botvinnik <olga.botvinnik@gmail.com>
Date: Mon, 24 Jun 2019 17:50:36 -0700
Subject: [PATCH 1/9] Try to clean up czb update

---
 conf/czbiohub_aws.config | 123 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 123 insertions(+)
 create mode 100644 conf/czbiohub_aws.config

diff --git a/conf/czbiohub_aws.config b/conf/czbiohub_aws.config
new file mode 100644
index 0000000..5c47c12
--- /dev/null
+++ b/conf/czbiohub_aws.config
@@ -0,0 +1,123 @@
+/*
+ * -------------------------------------------------
+ *  Nextflow config file for Chan Zuckerberg Biohub
+ * -------------------------------------------------
+ * Defines reference genomes, using iGenome paths
+ * Imported under the default 'standard' Nextflow
+ * profile in nextflow.config
+ */
+
+ //Profile config names for nf-core/configs
+ params {
+   config_profile_description = 'Chan Zuckerberg Biohub AWS Batch profile provided by nf-core/configs.'
+   config_profile_contact = 'Olga Botvinnik (@olgabot)'
+   config_profile_url = 'https://www.czbiohub.org/'
+ }
+
+docker {
+  enabled = true
+}
+
+process {
+  executor = 'awsbatch'
+  queue = 'nextflow'
+  errorStrategy = 'ignore'
+}
+
+workDir = "s3://czb-nextflow/intermediates/"
+
+aws.region = 'us-west-2'
+executor.awscli = '/home/ec2-user/miniconda/bin/aws'
+params.tracedir = './'
+
+params {
+  saveReference = true
+
+  // Largest SPOT instances available on AWS: https://ec2instances.info/
+  max_memory = 1952.GB
+  max_cpus = 96
+  max_time = 240.h
+
+  seq_center = "czbiohub"
+
+  // illumina iGenomes reference file paths on CZ Biohub reference s3 bucket
+  // No final slash because it's added later
+  igenomes_base = "s3://czbiohub-reference/igenomes"
+
+  // GENCODE (human + mouse) reference file paths on CZ Biohub reference s3 bucket
+  // No final slash because it's added later
+  gencode_base = "s3://czbiohub-reference/gencode"
+  transgenes_base = "s3://czbiohub-reference/transgenes"
+
+  // AWS configurations
+  awsregion = "us-west-2"
+  awsqueue = "nextflow"
+
+  igenomesIgnore = true
+
+  fc_extra_attributes = 'gene_name'
+  fc_group_features = 'gene_id'
+  fc_group_features_type = 'gene_type'
+
+  // GENCODE GTF and fasta files
+  genomes {
+    'GRCh38' {
+      fasta             = "${params.gencode_base}/human/v30/GRCh38.p12.genome.ERCC92.fa"
+      gtf               = "${params.gencode_base}/human/v30/gencode.v30.annotation.ERCC92.gene_type.gtf"
+      transcript_fasta  = "${params.gencode_base}/human/v30/gencode.v30.transcripts.ERCC92.fa"
+      star              = "${params.gencode_base}/human/v30/STARIndex/"
+    }
+    'GRCm38' {
+      fasta             = "${params.gencode_base}/mouse/vM21/GRCm38.p6.genome.ERCC92.fa"
+      gtf               = "${params.gencode_base}/mouse/vM21/gencode.vM21.annotation.ERCC92.gene_type.gtf"
+      transcript_fasta  = "${params.gencode_base}/mouse/vM21/gencode.vM21.transcripts.ERCC92.fa"
+    }
+  }
+
+  transgenes {
+    'ChR2' {
+      fasta   = "${params.transgenes_base}/ChR2/ChR2.fa"
+      gtf     = "${params.transgenes_base}/ChR2/ChR2.gtf"
+    }
+    'Cre' {
+      fasta   = "${params.transgenes_base}/Cre/Cre.fa"
+      gtf     = "${params.transgenes_base}/Cre/Cre.gtf"
+    }
+    'ERCC' {
+      fasta   = "${params.transgenes_base}/ERCC92/ERCC92.fa"
+      gtf     = "${params.transgenes_base}/ERCC92/ERCC92.gtf"
+    }
+    'GCaMP6m' {
+      fasta   = "${params.transgenes_base}/GCaMP6m/GCaMP6m.fa"
+      gtf     = "${params.transgenes_base}/GCaMP6m/GCaMP6m.gtf"
+    }
+    'GFP' {
+      fasta   = "${params.transgenes_base}/Gfp/Gfp.fa"
+      gtf     = "${params.transgenes_base}/Gfp/Gfp.gtf"
+    }
+    'NpHR' {
+      fasta   = "${params.transgenes_base}/NpHR/NpHR.fa"
+      gtf     = "${params.transgenes_base}/NpHR/NpHR.gtf"
+    }
+    'RCaMP' {
+      fasta   = "${params.transgenes_base}/RCaMP/RCaMP.fa"
+      gtf     = "${params.transgenes_base}/RCaMP/RCaMP.gtf"
+    }
+    'RGECO' {
+      fasta   = "${params.transgenes_base}/RGECO/RGECO.fa"
+      gtf     = "${params.transgenes_base}/RGECO/RGECO.gtf"
+    }
+    'Tdtom' {
+      fasta   = "${params.transgenes_base}/Tdtom/Tdtom.fa"
+      gtf     = "${params.transgenes_base}/Tdtom/Tdtom.gtf"
+    }
+    'Car-T' {
+      fasta   = "${params.transgenes_base}/car-t/car-t.fa"
+      gtf     = "${params.transgenes_base}/car-t/car-t.gtf"
+    }
+    'zsGreen' {
+      fasta   = "${params.transgenes_base}/zsGreen/zsGreen.fa"
+      gtf     = "${params.transgenes_base}/zsGreen/zsGreen.gtf"
+    }
+  }
+}

From b0c67e0a9b45112c570b0a9856a4ed479190ab13 Mon Sep 17 00:00:00 2001
From: Olga Botvinnik <olga.botvinnik@gmail.com>
Date: Mon, 24 Jun 2019 17:52:10 -0700
Subject: [PATCH 2/9] Add both seqCenter and seq_center

---
 conf/czbiohub_aws.config | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/conf/czbiohub_aws.config b/conf/czbiohub_aws.config
index 5c47c12..af7707a 100644
--- a/conf/czbiohub_aws.config
+++ b/conf/czbiohub_aws.config
@@ -38,7 +38,9 @@ params {
   max_cpus = 96
   max_time = 240.h
 
+  // Compatible with multiple versions of rnaseq pipeline
   seq_center = "czbiohub"
+  seqCenter = "czbiohub"
 
   // illumina iGenomes reference file paths on CZ Biohub reference s3 bucket
   // No final slash because it's added later

From b82d68c009a4deb4c193bd440982381c4ba8556d Mon Sep 17 00:00:00 2001
From: Olga Botvinnik <olga.botvinnik@gmail.com>
Date: Mon, 24 Jun 2019 17:55:01 -0700
Subject: [PATCH 3/9] Add docs

---
 docs/czbiohub.md | 125 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 125 insertions(+)
 create mode 100644 docs/czbiohub.md

diff --git a/docs/czbiohub.md b/docs/czbiohub.md
new file mode 100644
index 0000000..5e98f4f
--- /dev/null
+++ b/docs/czbiohub.md
@@ -0,0 +1,125 @@
+# nf-core/configs: CZ Biohub Configuration
+
+All nf-core pipelines have been successfully configured for use on the AWS Batch at the Chan Zuckerberg Biohub here.
+
+To use, run the pipeline with `-profile czbiohub_aws`. This will download and launch the [`czbiohub_aws.config`](../conf/czbiohub_aws.config) which has been pre-configured with a setup suitable for the AWS Batch. Using this profile, a docker image containing all of the required software will be downloaded, and converted to a Singularity image before execution of the pipeline.
+
+Ask Olga (olga.botvinnik@czbiohub.org) if you have any questions!
+
+## Run the pipeline from a small AWS EC2 Instance
+
+The pipeline will monitor and submit jobs to AWS Batch on your behalf. To ensure that the pipeline is successful, it will need to be run from a computer that has constant internet connection. Unfortunately for us, Biohub has spotty WiFi and even for short pipelines, it is highly recommended to run them from AWS.
+
+### 1. Start tmux
+
+[tmux](https://hackernoon.com/a-gentle-introduction-to-tmux-8d784c404340) is a "Terminal Multiplexer" that allows for commands to continue running even when you have closed your laptop. Start a new tmux session with `tmux new` and we'll name this session `nextflow`.
+
+```
+tmux new -n nextflow
+```
+
+Now you can run pipelines with abandon!
+
+### 2. Make a GitHub repo for your workflows (optional :)
+
+To make sharing your pipelines and commands easy between your teammates, it's best to share code in a GitHub repository. One way is to store the commands in a Makefile ([example](https://github.com/czbiohub/kh-workflows/blob/master/nf-kmer-similarity/Makefile)) which can contain multiple `nextflow run` commands so that you don't need to remember the S3 bucket or output directory for every single one. [Makefiles](https://kbroman.org/minimal_make/) are broadly used in the software community for running many complex commands. Makefiles can have a lot of dependencies and be confusing, so we're only going to write *simple* Makefiles.
+
+```
+rnaseq:
+  nextflow run -profile czbiohub_aws nf-core/rnaseq \
+      --reads 's3://czb-maca/Plate_seq/24_month/180626_A00111_0166_BH5LNVDSXX/fastqs/*{R1,R2}*.fastq.gz' \
+      --genome GRCm38 \
+      --outdir s3://olgabot-maca/nextflow-test/
+
+human_mouse_zebrafish:
+	nextflow run czbiohub/nf-kmer-similarity -latest -profile aws \
+		  --samples s3://kmer-hashing/hematopoeisis/smartseq2/human_mouse_zebrafish/samples.csv
+
+
+merkin2012_aws:
+	nextflow run czbiohub/nf-kmer-similarity -latest --sra "SRP016501" \
+		  -r olgabot/support-csv-directory-or-sra \
+		    -profile aws
+```
+
+In this example, one would run the `rnaseq` rule and the nextflow command beneath it with:
+
+```
+make rnaseq
+```
+
+If one wanted to run a different command, e.g. `human_mouse_zebrafish`, they would specify that command instead. For example:
+
+```
+make human_mouse_zebrafish
+```
+
+Makefiles are a very useful way of storing longer commands with short mnemonic words.
+
+
+Once you [create a new repository](https://github.com/organizations/czbiohub/repositories/new) (best to initialize with a `.gitignore`, license - MIT and `README`), clone that repository to your EC2 instance. For example, if the repository is called `kh-workflows`, this is what the command would look like:
+
+```
+git clone https://github.com/czbiohub/kh-workflows
+```
+
+Now both create and edit a `Makefile`:
+
+```
+cd
+nano Makefile
+```
+
+Write your rule with a colon after it, and on the next line must be a **tab**, not spaces. Once you're done, exit the program (the `^` command shown in nano means "Control"), write the file, add it to git, commit it, and push it up to GitHub.
+
+
+```
+git add Makefile
+git commit -m "Added makefile"
+git push origin master
+```
+
+
+### 3. Run your workflow!!
+
+Remember to specify `-profile czbiohub_aws` to grab the CZ Biohub-specific AWS configurations, and an `--outdir` with an AWS S3 bucket so you don't run out of space on your small AMI
+
+```
+nextflow run -profile czbiohub_aws nf-core/rnaseq \
+    --reads 's3://czb-maca/Plate_seq/24_month/180626_A00111_0166_BH5LNVDSXX/fastqs/*{R1,R2}*.fastq.gz' \
+    --genome GRCm38 \
+    --outdir s3://olgabot-maca/nextflow-test/
+```
+
+### 4. If you lose connection, how do you restart the jobs?
+
+If you close your laptop, get onto the train, or lose WiFi connection, you may lose connection to AWS and may need to restart the jobs. To reattach, use the command `tmux attach` and you should see your Nextflow output! To get the named session, use:
+
+```
+tmux attach -n nextflow
+```
+
+To restart the jobs from where you left off, add the `-resume` flag to your `nextflow` command:
+
+
+```
+nextflow run -profile czbiohub_aws nf-core/rnaseq \
+    --reads 's3://czb-maca/Plate_seq/24_month/180626_A00111_0166_BH5LNVDSXX/fastqs/*{R1,R2}*.fastq.gz' \
+    --genome GRCm38 \
+    --outdir s3://olgabot-maca/nextflow-test/ \
+    -resume
+```
+
+It's important that this command be re-run from the same directory as there is a "hidden" `.nextflow` folder that contains all the metadata and information about previous runs.
+
+## iGenomes specific configuration
+
+A local copy of the iGenomes resource has been made available on `s3://czbiohub-reference/igenomes` (in `us-west-2` region) so you should be able to run the pipeline against any reference available in the `igenomes.config` specific to the nf-core pipeline.
+You can do this by simply using the `--genome <GENOME_ID>` parameter.
+
+For Human and Mouse, we use [GENCODE](https://www.gencodegenes.org/) gene annotations. This doesn't change how you would specify the genome name, only that the pipelines run with the `czbiohub_aws` profile would be with GENCODE rather than iGenomes.
+
+
+>NB: You will need an account to use the HPC cluster on PROFILE CLUSTER in order to run the pipeline. If in doubt contact IT.
+
+>NB: Nextflow will need to submit the jobs via the job scheduler to the HPC cluster and as such the commands above will have to be executed on one of the login nodes. If in doubt contact IT.

From cfcb533b959d5fb2c4deb5750c6bb5645d5db009 Mon Sep 17 00:00:00 2001
From: Olga Botvinnik <olga.botvinnik@gmail.com>
Date: Tue, 25 Jun 2019 19:02:49 -0700
Subject: [PATCH 4/9] Add czbiohub_aws to list of configs"

---
 nfcore_custom.config | 1 +
 1 file changed, 1 insertion(+)

diff --git a/nfcore_custom.config b/nfcore_custom.config
index dfcb4d2..0bfc474 100644
--- a/nfcore_custom.config
+++ b/nfcore_custom.config
@@ -16,6 +16,7 @@ profiles {
   ccga         { includeConfig "${params.custom_config_base}/conf/ccga.config" }
   cfc          { includeConfig "${params.custom_config_base}/conf/cfc.config" }
   crick        { includeConfig "${params.custom_config_base}/conf/crick.config" }
+  czbiohub_aws { includeConfig "${params.custom_config_base}/conf/czbiohub_aws.config" }
   gis          { includeConfig "${params.custom_config_base}/conf/gis.config" }
   hebbe        { includeConfig "${params.custom_config_base}/conf/hebbe.config" }
   mendel       { includeConfig "${params.custom_config_base}/conf/mendel.config" }

From d1983f8acd15354160e60458355eee2cf75f9b56 Mon Sep 17 00:00:00 2001
From: Olga Botvinnik <olga.botvinnik@gmail.com>
Date: Wed, 26 Jun 2019 17:56:26 -0700
Subject: [PATCH 5/9] Use new and improved GENCODE+ERCC annotation

---
 conf/czbiohub_aws.config | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/conf/czbiohub_aws.config b/conf/czbiohub_aws.config
index af7707a..a89588b 100644
--- a/conf/czbiohub_aws.config
+++ b/conf/czbiohub_aws.config
@@ -65,13 +65,13 @@ params {
   genomes {
     'GRCh38' {
       fasta             = "${params.gencode_base}/human/v30/GRCh38.p12.genome.ERCC92.fa"
-      gtf               = "${params.gencode_base}/human/v30/gencode.v30.annotation.ERCC92.gene_type.gtf"
+      gtf               = "${params.gencode_base}/human/v30/gencode.v30.annotation.ERCC92.gtf"
       transcript_fasta  = "${params.gencode_base}/human/v30/gencode.v30.transcripts.ERCC92.fa"
       star              = "${params.gencode_base}/human/v30/STARIndex/"
     }
     'GRCm38' {
       fasta             = "${params.gencode_base}/mouse/vM21/GRCm38.p6.genome.ERCC92.fa"
-      gtf               = "${params.gencode_base}/mouse/vM21/gencode.vM21.annotation.ERCC92.gene_type.gtf"
+      gtf               = "${params.gencode_base}/mouse/vM21/gencode.vM21.annotation.ERCC92.gtf"
       transcript_fasta  = "${params.gencode_base}/mouse/vM21/gencode.vM21.transcripts.ERCC92.fa"
     }
   }

From 70b5205d153f4313bdf586837d0eb81e8b5f2607 Mon Sep 17 00:00:00 2001
From: Olga Botvinnik <olga.botvinnik@gmail.com>
Date: Thu, 27 Jun 2019 15:02:47 -0700
Subject: [PATCH 6/9] Add human salmon index and mouse star index

---
 conf/czbiohub_aws.config | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/conf/czbiohub_aws.config b/conf/czbiohub_aws.config
index a89588b..bd0228d 100644
--- a/conf/czbiohub_aws.config
+++ b/conf/czbiohub_aws.config
@@ -68,11 +68,13 @@ params {
       gtf               = "${params.gencode_base}/human/v30/gencode.v30.annotation.ERCC92.gtf"
       transcript_fasta  = "${params.gencode_base}/human/v30/gencode.v30.transcripts.ERCC92.fa"
       star              = "${params.gencode_base}/human/v30/STARIndex/"
+      salmon_index      = "${params.gencode_base}/human/v30/salmon_index/"
     }
     'GRCm38' {
       fasta             = "${params.gencode_base}/mouse/vM21/GRCm38.p6.genome.ERCC92.fa"
       gtf               = "${params.gencode_base}/mouse/vM21/gencode.vM21.annotation.ERCC92.gtf"
       transcript_fasta  = "${params.gencode_base}/mouse/vM21/gencode.vM21.transcripts.ERCC92.fa"
+      start             = "${params.gencode_base}/mouse/vM21/STARIndex/"
     }
   }
 

From 6a2d2003f0fed491ccabc130c36e6301616983fa Mon Sep 17 00:00:00 2001
From: Olga Botvinnik <olga.botvinnik@gmail.com>
Date: Mon, 1 Jul 2019 09:25:46 -0700
Subject: [PATCH 7/9] Add high priority AWS batch queue to CZBiohub AWS

---
 conf/czbiohub_aws.config              |   2 +-
 conf/czbiohub_aws_highpriority.config | 127 ++++++++++++++++++++++++++
 nfcore_custom.config                  |   1 +
 3 files changed, 129 insertions(+), 1 deletion(-)
 create mode 100644 conf/czbiohub_aws_highpriority.config

diff --git a/conf/czbiohub_aws.config b/conf/czbiohub_aws.config
index bd0228d..d20b405 100644
--- a/conf/czbiohub_aws.config
+++ b/conf/czbiohub_aws.config
@@ -20,7 +20,7 @@ docker {
 
 process {
   executor = 'awsbatch'
-  queue = 'nextflow'
+  queue = 'default-971039e0-830c-11e9-9e0b-02c5b84a8036'
   errorStrategy = 'ignore'
 }
 
diff --git a/conf/czbiohub_aws_highpriority.config b/conf/czbiohub_aws_highpriority.config
new file mode 100644
index 0000000..2bf9a4d
--- /dev/null
+++ b/conf/czbiohub_aws_highpriority.config
@@ -0,0 +1,127 @@
+/*
+ * -------------------------------------------------
+ *  Nextflow config file for Chan Zuckerberg Biohub
+ * -------------------------------------------------
+ * Defines reference genomes, using iGenome paths
+ * Imported under the default 'standard' Nextflow
+ * profile in nextflow.config
+ */
+
+ //Profile config names for nf-core/configs
+ params {
+   config_profile_description = 'Chan Zuckerberg Biohub AWS Batch profile provided by nf-core/configs.'
+   config_profile_contact = 'Olga Botvinnik (@olgabot)'
+   config_profile_url = 'https://www.czbiohub.org/'
+ }
+
+docker {
+  enabled = true
+}
+
+process {
+  executor = 'awsbatch'
+  queue = 'highpriority-971039e0-830c-11e9-9e0b-02c5b84a8036'
+  errorStrategy = 'ignore'
+}
+
+workDir = "s3://czb-nextflow/intermediates/"
+
+aws.region = 'us-west-2'
+executor.awscli = '/home/ec2-user/miniconda/bin/aws'
+params.tracedir = './'
+
+params {
+  saveReference = true
+
+  // Largest SPOT instances available on AWS: https://ec2instances.info/
+  max_memory = 1952.GB
+  max_cpus = 96
+  max_time = 240.h
+
+  // Compatible with multiple versions of rnaseq pipeline
+  seq_center = "czbiohub"
+  seqCenter = "czbiohub"
+
+  // illumina iGenomes reference file paths on CZ Biohub reference s3 bucket
+  // No final slash because it's added later
+  igenomes_base = "s3://czbiohub-reference/igenomes"
+
+  // GENCODE (human + mouse) reference file paths on CZ Biohub reference s3 bucket
+  // No final slash because it's added later
+  gencode_base = "s3://czbiohub-reference/gencode"
+  transgenes_base = "s3://czbiohub-reference/transgenes"
+
+  // AWS configurations
+  awsregion = "us-west-2"
+  awsqueue = "nextflow"
+
+  igenomesIgnore = true
+
+  fc_extra_attributes = 'gene_name'
+  fc_group_features = 'gene_id'
+  fc_group_features_type = 'gene_type'
+
+  // GENCODE GTF and fasta files
+  genomes {
+    'GRCh38' {
+      fasta             = "${params.gencode_base}/human/v30/GRCh38.p12.genome.ERCC92.fa"
+      gtf               = "${params.gencode_base}/human/v30/gencode.v30.annotation.ERCC92.gtf"
+      transcript_fasta  = "${params.gencode_base}/human/v30/gencode.v30.transcripts.ERCC92.fa"
+      star              = "${params.gencode_base}/human/v30/STARIndex/"
+      salmon_index      = "${params.gencode_base}/human/v30/salmon_index/"
+    }
+    'GRCm38' {
+      fasta             = "${params.gencode_base}/mouse/vM21/GRCm38.p6.genome.ERCC92.fa"
+      gtf               = "${params.gencode_base}/mouse/vM21/gencode.vM21.annotation.ERCC92.gtf"
+      transcript_fasta  = "${params.gencode_base}/mouse/vM21/gencode.vM21.transcripts.ERCC92.fa"
+      start             = "${params.gencode_base}/mouse/vM21/STARIndex/"
+    }
+  }
+
+  transgenes {
+    'ChR2' {
+      fasta   = "${params.transgenes_base}/ChR2/ChR2.fa"
+      gtf     = "${params.transgenes_base}/ChR2/ChR2.gtf"
+    }
+    'Cre' {
+      fasta   = "${params.transgenes_base}/Cre/Cre.fa"
+      gtf     = "${params.transgenes_base}/Cre/Cre.gtf"
+    }
+    'ERCC' {
+      fasta   = "${params.transgenes_base}/ERCC92/ERCC92.fa"
+      gtf     = "${params.transgenes_base}/ERCC92/ERCC92.gtf"
+    }
+    'GCaMP6m' {
+      fasta   = "${params.transgenes_base}/GCaMP6m/GCaMP6m.fa"
+      gtf     = "${params.transgenes_base}/GCaMP6m/GCaMP6m.gtf"
+    }
+    'GFP' {
+      fasta   = "${params.transgenes_base}/Gfp/Gfp.fa"
+      gtf     = "${params.transgenes_base}/Gfp/Gfp.gtf"
+    }
+    'NpHR' {
+      fasta   = "${params.transgenes_base}/NpHR/NpHR.fa"
+      gtf     = "${params.transgenes_base}/NpHR/NpHR.gtf"
+    }
+    'RCaMP' {
+      fasta   = "${params.transgenes_base}/RCaMP/RCaMP.fa"
+      gtf     = "${params.transgenes_base}/RCaMP/RCaMP.gtf"
+    }
+    'RGECO' {
+      fasta   = "${params.transgenes_base}/RGECO/RGECO.fa"
+      gtf     = "${params.transgenes_base}/RGECO/RGECO.gtf"
+    }
+    'Tdtom' {
+      fasta   = "${params.transgenes_base}/Tdtom/Tdtom.fa"
+      gtf     = "${params.transgenes_base}/Tdtom/Tdtom.gtf"
+    }
+    'Car-T' {
+      fasta   = "${params.transgenes_base}/car-t/car-t.fa"
+      gtf     = "${params.transgenes_base}/car-t/car-t.gtf"
+    }
+    'zsGreen' {
+      fasta   = "${params.transgenes_base}/zsGreen/zsGreen.fa"
+      gtf     = "${params.transgenes_base}/zsGreen/zsGreen.gtf"
+    }
+  }
+}
diff --git a/nfcore_custom.config b/nfcore_custom.config
index 0bfc474..de5961c 100644
--- a/nfcore_custom.config
+++ b/nfcore_custom.config
@@ -17,6 +17,7 @@ profiles {
   cfc          { includeConfig "${params.custom_config_base}/conf/cfc.config" }
   crick        { includeConfig "${params.custom_config_base}/conf/crick.config" }
   czbiohub_aws { includeConfig "${params.custom_config_base}/conf/czbiohub_aws.config" }
+  czbiohub_aws_highpriority { includeConfig "${params.custom_config_base}/conf/czbiohub_aws_highpriority.config" }
   gis          { includeConfig "${params.custom_config_base}/conf/gis.config" }
   hebbe        { includeConfig "${params.custom_config_base}/conf/hebbe.config" }
   mendel       { includeConfig "${params.custom_config_base}/conf/mendel.config" }

From 917ec0863d5e427940ec8a19ecdc258df786aef1 Mon Sep 17 00:00:00 2001
From: Olga Botvinnik <olga.botvinnik@gmail.com>
Date: Wed, 3 Jul 2019 16:01:03 -0700
Subject: [PATCH 8/9] add default trimming

---
 conf/czbiohub_aws.config | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/conf/czbiohub_aws.config b/conf/czbiohub_aws.config
index d20b405..71d1a9b 100644
--- a/conf/czbiohub_aws.config
+++ b/conf/czbiohub_aws.config
@@ -61,6 +61,8 @@ params {
   fc_group_features = 'gene_id'
   fc_group_features_type = 'gene_type'
 
+  trim_pattern = '_+S\\d+'
+
   // GENCODE GTF and fasta files
   genomes {
     'GRCh38' {

From 069963ff8a5ce9f6cad3a7be71e44782f3417ee4 Mon Sep 17 00:00:00 2001
From: Olga Botvinnik <olga.botvinnik@gmail.com>
Date: Wed, 3 Jul 2019 16:01:47 -0700
Subject: [PATCH 9/9] Make czbiohub_aws_highpriority only set the AWS Batch
 queue and nothing else

---
 conf/czbiohub_aws_highpriority.config | 115 --------------------------
 nfcore_custom.config                  |   5 +-
 2 files changed, 4 insertions(+), 116 deletions(-)

diff --git a/conf/czbiohub_aws_highpriority.config b/conf/czbiohub_aws_highpriority.config
index 2bf9a4d..5ab796a 100644
--- a/conf/czbiohub_aws_highpriority.config
+++ b/conf/czbiohub_aws_highpriority.config
@@ -7,121 +7,6 @@
  * profile in nextflow.config
  */
 
- //Profile config names for nf-core/configs
- params {
-   config_profile_description = 'Chan Zuckerberg Biohub AWS Batch profile provided by nf-core/configs.'
-   config_profile_contact = 'Olga Botvinnik (@olgabot)'
-   config_profile_url = 'https://www.czbiohub.org/'
- }
-
-docker {
-  enabled = true
-}
-
 process {
-  executor = 'awsbatch'
   queue = 'highpriority-971039e0-830c-11e9-9e0b-02c5b84a8036'
-  errorStrategy = 'ignore'
-}
-
-workDir = "s3://czb-nextflow/intermediates/"
-
-aws.region = 'us-west-2'
-executor.awscli = '/home/ec2-user/miniconda/bin/aws'
-params.tracedir = './'
-
-params {
-  saveReference = true
-
-  // Largest SPOT instances available on AWS: https://ec2instances.info/
-  max_memory = 1952.GB
-  max_cpus = 96
-  max_time = 240.h
-
-  // Compatible with multiple versions of rnaseq pipeline
-  seq_center = "czbiohub"
-  seqCenter = "czbiohub"
-
-  // illumina iGenomes reference file paths on CZ Biohub reference s3 bucket
-  // No final slash because it's added later
-  igenomes_base = "s3://czbiohub-reference/igenomes"
-
-  // GENCODE (human + mouse) reference file paths on CZ Biohub reference s3 bucket
-  // No final slash because it's added later
-  gencode_base = "s3://czbiohub-reference/gencode"
-  transgenes_base = "s3://czbiohub-reference/transgenes"
-
-  // AWS configurations
-  awsregion = "us-west-2"
-  awsqueue = "nextflow"
-
-  igenomesIgnore = true
-
-  fc_extra_attributes = 'gene_name'
-  fc_group_features = 'gene_id'
-  fc_group_features_type = 'gene_type'
-
-  // GENCODE GTF and fasta files
-  genomes {
-    'GRCh38' {
-      fasta             = "${params.gencode_base}/human/v30/GRCh38.p12.genome.ERCC92.fa"
-      gtf               = "${params.gencode_base}/human/v30/gencode.v30.annotation.ERCC92.gtf"
-      transcript_fasta  = "${params.gencode_base}/human/v30/gencode.v30.transcripts.ERCC92.fa"
-      star              = "${params.gencode_base}/human/v30/STARIndex/"
-      salmon_index      = "${params.gencode_base}/human/v30/salmon_index/"
-    }
-    'GRCm38' {
-      fasta             = "${params.gencode_base}/mouse/vM21/GRCm38.p6.genome.ERCC92.fa"
-      gtf               = "${params.gencode_base}/mouse/vM21/gencode.vM21.annotation.ERCC92.gtf"
-      transcript_fasta  = "${params.gencode_base}/mouse/vM21/gencode.vM21.transcripts.ERCC92.fa"
-      start             = "${params.gencode_base}/mouse/vM21/STARIndex/"
-    }
-  }
-
-  transgenes {
-    'ChR2' {
-      fasta   = "${params.transgenes_base}/ChR2/ChR2.fa"
-      gtf     = "${params.transgenes_base}/ChR2/ChR2.gtf"
-    }
-    'Cre' {
-      fasta   = "${params.transgenes_base}/Cre/Cre.fa"
-      gtf     = "${params.transgenes_base}/Cre/Cre.gtf"
-    }
-    'ERCC' {
-      fasta   = "${params.transgenes_base}/ERCC92/ERCC92.fa"
-      gtf     = "${params.transgenes_base}/ERCC92/ERCC92.gtf"
-    }
-    'GCaMP6m' {
-      fasta   = "${params.transgenes_base}/GCaMP6m/GCaMP6m.fa"
-      gtf     = "${params.transgenes_base}/GCaMP6m/GCaMP6m.gtf"
-    }
-    'GFP' {
-      fasta   = "${params.transgenes_base}/Gfp/Gfp.fa"
-      gtf     = "${params.transgenes_base}/Gfp/Gfp.gtf"
-    }
-    'NpHR' {
-      fasta   = "${params.transgenes_base}/NpHR/NpHR.fa"
-      gtf     = "${params.transgenes_base}/NpHR/NpHR.gtf"
-    }
-    'RCaMP' {
-      fasta   = "${params.transgenes_base}/RCaMP/RCaMP.fa"
-      gtf     = "${params.transgenes_base}/RCaMP/RCaMP.gtf"
-    }
-    'RGECO' {
-      fasta   = "${params.transgenes_base}/RGECO/RGECO.fa"
-      gtf     = "${params.transgenes_base}/RGECO/RGECO.gtf"
-    }
-    'Tdtom' {
-      fasta   = "${params.transgenes_base}/Tdtom/Tdtom.fa"
-      gtf     = "${params.transgenes_base}/Tdtom/Tdtom.gtf"
-    }
-    'Car-T' {
-      fasta   = "${params.transgenes_base}/car-t/car-t.fa"
-      gtf     = "${params.transgenes_base}/car-t/car-t.gtf"
-    }
-    'zsGreen' {
-      fasta   = "${params.transgenes_base}/zsGreen/zsGreen.fa"
-      gtf     = "${params.transgenes_base}/zsGreen/zsGreen.gtf"
-    }
-  }
 }
diff --git a/nfcore_custom.config b/nfcore_custom.config
index de5961c..72dca30 100644
--- a/nfcore_custom.config
+++ b/nfcore_custom.config
@@ -17,7 +17,10 @@ profiles {
   cfc          { includeConfig "${params.custom_config_base}/conf/cfc.config" }
   crick        { includeConfig "${params.custom_config_base}/conf/crick.config" }
   czbiohub_aws { includeConfig "${params.custom_config_base}/conf/czbiohub_aws.config" }
-  czbiohub_aws_highpriority { includeConfig "${params.custom_config_base}/conf/czbiohub_aws_highpriority.config" }
+  czbiohub_aws_highpriority {
+    includeConfig "${params.custom_config_base}/conf/czbiohub_aws.config"
+    includeConfig "${params.custom_config_base}/conf/czbiohub_aws_highpriority.config"
+   }
   gis          { includeConfig "${params.custom_config_base}/conf/gis.config" }
   hebbe        { includeConfig "${params.custom_config_base}/conf/hebbe.config" }
   mendel       { includeConfig "${params.custom_config_base}/conf/mendel.config" }