Filter long reads for MALT, bump cpus for FastQC for minigut to pass

2024-11-22 01:06:04 +00:00 · 2022-03-28 18:20:10 +02:00 · 2022-03-28 18:20:10 +02:00 · 94e5cfef4a
commit 94e5cfef4a
parent eada201eb2
4 changed files with 22 additions and 3 deletions
--- a/conf/base.config
+++ b/conf/base.config
@ -27,7 +27,7 @@ process {
    // TODO nf-core: Customise requirements for specific processes.
    // See https://www.nextflow.io/docs/latest/config.html#config-process-selectors
    withLabel:process_low {
-        cpus   = { check_max( 2     * task.attempt, 'cpus'    ) }
+        cpus   = { check_max( 4     * task.attempt, 'cpus'    ) }
        memory = { check_max( 12.GB * task.attempt, 'memory'  ) }
        time   = { check_max( 4.h   * task.attempt, 'time'    ) }
    }
--- a/conf/test.config
+++ b/conf/test.config
@ -15,7 +15,7 @@ params {
    config_profile_description = 'Minimal test dataset to check pipeline function'

    // Limit resources so that this can run on GitHub Actions
-    max_cpus   = 2
+    max_cpus   = 8
    max_memory = '6.GB'
    max_time   = '6.h'

@ -29,3 +29,16 @@ params {
    shortread_clipmerge = true

 }
+
+process {
+    withName: FASTQC {
+        cpus   = { check_max( 8     * task.attempt, 'cpus'    ) }
+        memory = { check_max( 6.GB * task.attempt, 'memory'  ) }
+        time   = { check_max( 6.h   * task.attempt, 'time'    ) }
+    }
+    withName: FASTQC_PROCESSED {
+        cpus   = { check_max( 8     * task.attempt, 'cpus'    ) }
+        memory = { check_max( 6.GB * task.attempt, 'memory'  ) }
+        time   = { check_max( 6.h   * task.attempt, 'time'    ) }
+    }
+}
--- a/subworkflows/local/db_check.nf
+++ b/subworkflows/local/db_check.nf
@ -14,7 +14,7 @@ workflow DB_CHECK {
    // TODO: make database sheet check
    // Checks:
    // 1) no duplicates,
-    // 2) dbs with no special arguments does not have quotes, e.g. just `,,` and NOT `,"",`
+    // 2) args do not have quotes, e.g. just `,,` and NOT `,"",`
    parsed_samplesheet = DATABASE_CHECK ( dbsheet )
        .csv
        .splitCsv ( header:true, sep:',' )
--- a/workflows/taxprofiler.nf
+++ b/workflows/taxprofiler.nf
@ -90,9 +90,11 @@ workflow TAXPROFILER {
        MODULE: Run FastQC
    */
    ch_input_for_fastqc = INPUT_CHECK.out.fastq.mix( INPUT_CHECK.out.nanopore ).dump(tag: "input_to_fastq")
+
    FASTQC (
        ch_input_for_fastqc
    )
+
    ch_versions = ch_versions.mix(FASTQC.out.versions.first())

    CUSTOM_DUMPSOFTWAREVERSIONS (
@ -137,7 +139,11 @@ workflow TAXPROFILER {

    // We groupTuple to have all samples in one channel for MALT as database
    // loading takes a long time, so we only want to run it once per database
+    // TODO document somewhere we only accept illumina short reads for MALT?
    ch_input_for_malt =  ch_input_for_profiling.malt
+                            .dump(tag: "input_to_malt_prefilter")
+                            .filter { it[0]['instrument_platform'] == 'ILLUMINA' }
+                            .dump(tag: "input_to_malt_postfilter")
                            .map {
                                it ->
                                    def temp_meta =  [ id: it[2]['db_name']]  + it[2]