Clean up AdapterRemoval and remove all debugging dumps

2024-11-22 01:26:04 +00:00 · 2022-04-01 21:36:22 +02:00 · 2022-04-01 21:36:22 +02:00 · 93aaa9eeac
commit 93aaa9eeac
parent 03e832954f
5 changed files with 18 additions and 27 deletions
--- a/subworkflows/local/db_check.nf
+++ b/subworkflows/local/db_check.nf
@ -18,9 +18,7 @@ workflow DB_CHECK {
    parsed_samplesheet = DATABASE_CHECK ( dbsheet )
        .csv
        .splitCsv ( header:true, sep:',' )
-        .dump(tag: "db_split_csv_out")
        .map { create_db_channels(it) }
-        .dump(tag: "db_channel_prepped")

    ch_dbs_for_untar = parsed_samplesheet
        .branch {
--- a/subworkflows/local/input_check.nf
+++ b/subworkflows/local/input_check.nf
@ -12,7 +12,6 @@ workflow INPUT_CHECK {
    parsed_samplesheet = SAMPLESHEET_CHECK ( samplesheet )
        .csv
        .splitCsv ( header:true, sep:',' )
-        .dump(tag: "input_split_csv_out")
        .branch {
            fasta: it['fasta'] != ''
            nanopore: it['instrument_platform'] == 'OXFORD_NANOPORE'
@ -21,17 +20,14 @@ workflow INPUT_CHECK {

    parsed_samplesheet.fastq
        .map { create_fastq_channel(it) }
-        .dump(tag: "fastq_channel_init")
        .set { fastq }

    parsed_samplesheet.nanopore
        .map { create_fastq_channel(it) }
-        .dump(tag: "fastq_nanopore_channel_init")
        .set { nanopore }

    parsed_samplesheet.fasta
        .map { create_fasta_channel(it) }
-        .dump(tag: "fasta_channel_init")
        .set { fasta }

    emit:
--- a/subworkflows/local/longread_preprocessing.nf
+++ b/subworkflows/local/longread_preprocessing.nf
@ -16,7 +16,6 @@ workflow LONGREAD_PREPROCESSING {
    PORECHOP ( reads )

    ch_processed_reads = PORECHOP.out.reads
-                                .dump(tag: "pre_fastqc_check")
                                .map {
                                        meta, reads ->
                                        def meta_new = meta.clone()
--- a/subworkflows/local/shortread_adapterremoval.nf
+++ b/subworkflows/local/shortread_adapterremoval.nf
@ -24,9 +24,10 @@ workflow SHORTREAD_ADAPTERREMOVAL {
    ADAPTERREMOVAL_SINGLE ( ch_input_for_adapterremoval.single, [] )
    ADAPTERREMOVAL_PAIRED ( ch_input_for_adapterremoval.paired, [] )

-    if ( params.shortread_clipmerge_mergepairs && !params.shortread_clipmerge_excludeunmerged ) {
-        // due to the slightly ugly output implementation of the current AdapterRemoval2 module, each file
+    // due to the slightly ugly output implementation of the current AdapterRemoval2 version, each file
    // has to be exported in a separate channel, and we must manually recombine when necessary
+
+    if ( params.shortread_clipmerge_mergepairs && !params.shortread_clipmerge_excludeunmerged ) {
        ch_adapterremoval_for_cat = ADAPTERREMOVAL_PAIRED.out.collapsed
                                                .mix(
                                                    ADAPTERREMOVAL_PAIRED.out.collapsed_truncated,
@ -43,8 +44,8 @@ workflow SHORTREAD_ADAPTERREMOVAL {
                                                    }
                                                    .groupTuple()

-
-        ch_adapterremoval_reads_prepped = CAT_FASTQ ( ch_adapterremoval_for_cat ).reads.mix( ADAPTERREMOVAL_SINGLE.out.singles_truncated )
+        ch_adapterremoval_reads_prepped = CAT_FASTQ ( ch_adapterremoval_for_cat ).reads
+                                            .mix( ADAPTERREMOVAL_SINGLE.out.singles_truncated )

    } else if ( params.shortread_clipmerge_mergepairs && params.shortread_clipmerge_excludeunmerged ) {
        ch_adapterremoval_for_cat = ADAPTERREMOVAL_PAIRED.out.collapsed
@ -58,7 +59,8 @@ workflow SHORTREAD_ADAPTERREMOVAL {
                                                    }
                                                    .groupTuple(by: 0)

-            ch_adapterremoval_reads_prepped = CAT_FASTQ ( ch_adapterremoval_for_cat ).reads.mix( ADAPTERREMOVAL_SINGLE.out.singles_truncated )
+        ch_adapterremoval_reads_prepped = CAT_FASTQ ( ch_adapterremoval_for_cat ).reads
+                                            .mix( ADAPTERREMOVAL_SINGLE.out.singles_truncated )

    } else {

--- a/workflows/taxprofiler.nf
+++ b/workflows/taxprofiler.nf
@ -18,6 +18,7 @@ for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true
 if (params.input    ) { ch_input     = file(params.input)     } else { exit 1, 'Input samplesheet not specified!' }
 if (params.databases) { ch_databases = file(params.databases) } else { exit 1, 'Input database sheet not specified!' }
 if (params.shortread_clipmerge_mergepairs && params.run_malt ) log.warn "[nf-core/taxprofiler] warning: MALT does not except uncollapsed paired-reads. Pairs will be profiled as separate files."
+if (params.shortread_clipmerge_excludeunmerged && !params.shortread_clipmerge_mergepairs) exit 1, "[nf-core/taxprofiler] error: cannot include unmerged reads when merging not turned on. Please specify --shortread_clipmerge_mergepairs"

 /*
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@ -126,7 +127,6 @@ workflow TAXPROFILER {
    ch_input_for_profiling = ch_shortreads_preprocessed
            .mix( ch_longreads_preprocessed )
            .combine(DB_CHECK.out.dbs)
-            .dump(tag: "reads_plus_db_clean")
            .branch {
                malt:    it[2]['tool'] == 'malt'
                kraken2: it[2]['tool'] == 'kraken2'
@ -141,9 +141,7 @@ workflow TAXPROFILER {
    // loading takes a long time, so we only want to run it once per database
    // TODO document somewhere we only accept illumina short reads for MALT?
    ch_input_for_malt =  ch_input_for_profiling.malt
-                            .dump(tag: "input_to_malt_prefilter")
                            .filter { it[0]['instrument_platform'] == 'ILLUMINA' }
-                            .dump(tag: "input_to_malt_postfilter")
                            .map {
                                it ->
                                    def temp_meta =  [ id: it[2]['db_name']]  + it[2]
@ -151,7 +149,6 @@ workflow TAXPROFILER {
                                    [ temp_meta, it[1], db ]
                            }
                            .groupTuple(by: [0,2])
-                            .dump(tag: "input_to_malt")
                            .multiMap {
                                it ->
                                    reads: [ it[0], it[1].flatten() ]
@ -160,7 +157,6 @@ workflow TAXPROFILER {

    // We can run Kraken2 one-by-one sample-wise
    ch_input_for_kraken2 =  ch_input_for_profiling.kraken2
-                            .dump(tag: "input_to_kraken")
                            .multiMap {
                                it ->
                                    reads: [ it[0] + it[2], it[1] ]