From 7b08c49cd6cb6471384a26c1202733cad0fe58ae Mon Sep 17 00:00:00 2001
From: James Fellows Yates <jfy133@gmail.com>
Date: Fri, 8 Apr 2022 11:54:54 +0200
Subject: [PATCH] Re-add run merging and gonna let GHA see if it works >.>

---
 .github/workflows/ci.yml |  1 +
 nextflow.config          |  2 ++
 workflows/taxprofiler.nf | 25 +++++++++++++++++++++++--
 3 files changed, 26 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 79148f0..7678645 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -39,6 +39,7 @@ jobs:
           - "--shortread_clipmerge_tool adapterremoval --shortread_clipmerge_mergepairs"
           - "--shortread_complexityfilter_tool bbduk"
           - "--shortread_complexityfilter_tool prinseq"
+          - "--run_merging"
 
     steps:
       - name: Check out pipeline code
diff --git a/nextflow.config b/nextflow.config
index 19cc823..1c69d36 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -76,6 +76,8 @@ params {
     shortread_complexityfilter_prinseqplusplus_dustscore = 0.5
     save_complexityfiltered_reads                        = false
 
+    // run merging
+    run_merging                = false
 
     // MALT
     run_malt                   = false
diff --git a/workflows/taxprofiler.nf b/workflows/taxprofiler.nf
index 3b08402..61eda6e 100644
--- a/workflows/taxprofiler.nf
+++ b/workflows/taxprofiler.nf
@@ -125,13 +125,34 @@ workflow TAXPROFILER {
         ch_shortreads_filtered = ch_shortreads_preprocessed
     }
 
+    /*
+        STEP: Run merging
+    */
+
+    if ( params.run_merging ) {
+        ch_reads_for_cat = ch_shortreads_filtered
+            .mix( ch_longreads_preprocessed )
+            .map {
+                meta, reads ->
+                    def meta_new = meta.clone()
+                    meta_new['run_accession'].remove()
+                    [ meta_new, reads ]
+            }
+            .groupTuple()
+
+        ch_reads_runmerged = CAT_FASTQ ( ch_reads_for_cat )
+
+    } else {
+        ch_reads_runmerged = ch_shortreads_filtered
+            .mix( ch_longreads_preprocessed )
+    }
+
     /*
         COMBINE READS WITH POSSIBLE DATABASES
     */
 
     // e.g. output [DUMP: reads_plus_db] [['id':'2612', 'run_accession':'combined', 'instrument_platform':'ILLUMINA', 'single_end':1], <reads_path>/2612.merged.fastq.gz, ['tool':'malt', 'db_name':'mal95', 'db_params':'"-id 90"'], <db_path>/malt90]
-    ch_input_for_profiling = ch_shortreads_filtered
-            .mix( ch_longreads_preprocessed )
+    ch_input_for_profiling = ch_reads_runmerged
             .combine(DB_CHECK.out.dbs)
             .branch {
                 malt:    it[2]['tool'] == 'malt'