From 085d9e519cb91ace783ca33fb1e6ed47a0bff9c9 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Tue, 24 Jan 2023 11:39:37 +0100 Subject: [PATCH 1/6] [skip ci] add comment about recommendation to split SR/LR --- docs/usage.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/usage.md b/docs/usage.md index 3a4e55c..158cfab 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -64,6 +64,8 @@ ERR3201952,ERR3201952,OXFORD_NANOPORE,///fastq/ERR3201952.fastq.gz,, > ⚠️ Input FASTQ and FASTA files _must_ be gzipped +> ⚠️ While one can include both short-read and long-read data in one run, we recommend that you split these across _two_ pipeline runs and database sheets (see below). This will allow classification optmisation for each data type, and make MultiQC run-reports more readable (due to run statististics having vary large number differences). + | Column | Description | | --------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | `sample` | Unique sample name [required]. | From 920583127347356b25b5a8a3b14db31100687100 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Tue, 24 Jan 2023 11:50:10 +0100 Subject: [PATCH 2/6] Start adding filter for long read and MP3 --- subworkflows/local/profiling.nf | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/subworkflows/local/profiling.nf b/subworkflows/local/profiling.nf index 6613648..38936eb 100644 --- a/subworkflows/local/profiling.nf +++ b/subworkflows/local/profiling.nf @@ -202,8 +202,11 @@ workflow PROFILING { ch_input_for_metaphlan3 = ch_input_for_profiling.metaphlan3 .filter{ - if (it[0].is_fasta) log.warn "[nf-core/taxprofiler] MetaPhlAn3 currently does not accept FASTA files as input. Skipping MetaPhlAn3 for sample ${it[0].id}." - !it[0].is_fasta + meta, report -> + if (meta.is_fasta) log.warn "[nf-core/taxprofiler] MetaPhlAn3 currently does not accept FASTA files as input. Skipping MetaPhlAn3 for sample ${meta.id}." + !meta.is_fasta + if ( meta['instrument_platform'] == 'OXFORD_NANOPORE' ) log.warn "[nf-core/taxprofiler] MetaPhlAn3 has not been evaluated for Nanopore data. Skipping MetaPhlAn3 for sample ${meta.id}." + meta['tool'] == 'metaphlan3' && meta['instrument_platform'] != 'OXFORD_NANOPORE' } .multiMap { it -> From 9cf2d8763f25ef4d9c9e20e7db425a841e593c4e Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Tue, 24 Jan 2023 11:50:58 +0100 Subject: [PATCH 3/6] [skip ci] Update docs/usage.md Co-authored-by: Sofia Stamouli <91951607+sofstam@users.noreply.github.com> --- docs/usage.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/usage.md b/docs/usage.md index 158cfab..d37ba60 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -64,7 +64,7 @@ ERR3201952,ERR3201952,OXFORD_NANOPORE,///fastq/ERR3201952.fastq.gz,, > ⚠️ Input FASTQ and FASTA files _must_ be gzipped -> ⚠️ While one can include both short-read and long-read data in one run, we recommend that you split these across _two_ pipeline runs and database sheets (see below). This will allow classification optmisation for each data type, and make MultiQC run-reports more readable (due to run statististics having vary large number differences). +> ⚠️ While one can include both short-read and long-read data in one run, we recommend that you split these across _two_ pipeline runs and database sheets (see below). This will allow classification optimisation for each data type, and make MultiQC run-reports more readable (due to run statistics having vary large number differences). | Column | Description | | --------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | From 0358489be0f4098b1f2cee1a9c396f5a37cd08e3 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Tue, 24 Jan 2023 13:05:52 +0100 Subject: [PATCH 4/6] Remove metaphlan profiling for nanopore as it appears to always result in 100% unclassified --- subworkflows/local/profiling.nf | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/subworkflows/local/profiling.nf b/subworkflows/local/profiling.nf index 38936eb..f6eddb0 100644 --- a/subworkflows/local/profiling.nf +++ b/subworkflows/local/profiling.nf @@ -202,11 +202,8 @@ workflow PROFILING { ch_input_for_metaphlan3 = ch_input_for_profiling.metaphlan3 .filter{ - meta, report -> - if (meta.is_fasta) log.warn "[nf-core/taxprofiler] MetaPhlAn3 currently does not accept FASTA files as input. Skipping MetaPhlAn3 for sample ${meta.id}." - !meta.is_fasta - if ( meta['instrument_platform'] == 'OXFORD_NANOPORE' ) log.warn "[nf-core/taxprofiler] MetaPhlAn3 has not been evaluated for Nanopore data. Skipping MetaPhlAn3 for sample ${meta.id}." - meta['tool'] == 'metaphlan3' && meta['instrument_platform'] != 'OXFORD_NANOPORE' + if (it[0].is_fasta || it[0].instrument_platform == 'OXFORD_NANOPORE' ) log.warn "[nf-core/taxprofiler] MetaPhlAn3 currently does not accept FASTA files as input and/or has not been evaluated for Nanopore data. Skipping MetaPhlAn3 for sample ${it[0].id}." + !(it[0].is_fasta || it[0].instrument_platform == 'OXFORD_NANOPORE') } .multiMap { it -> From 14826d52056b6251f68b50d6bc605f89d8294230 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Tue, 24 Jan 2023 13:20:58 +0100 Subject: [PATCH 5/6] Separate conditions, remove some left-over dumps --- subworkflows/local/db_check.nf | 2 +- subworkflows/local/profiling.nf | 9 +++++---- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/subworkflows/local/db_check.nf b/subworkflows/local/db_check.nf index a89b519..96ae7a1 100644 --- a/subworkflows/local/db_check.nf +++ b/subworkflows/local/db_check.nf @@ -40,7 +40,7 @@ workflow DB_CHECK { } //Filter the channel to run untar on DBs of tools actually using - ch_input_untar = ch_dbs_for_untar.untar.dump() + ch_input_untar = ch_dbs_for_untar.untar .filter { params.run_kraken2 && it[0]['tool'] == 'kraken2' || params.run_centrifuge && it[0]['tool'] == 'centrifuge' || params.run_bracken && it[0]['tool'] == 'bracken' || params.run_kaiju && it[0]['tool'] == 'kaiju' || params.run_krakenuniq && it [0]['tool'] == 'krakenuniq' || params.run_malt && it[0]['tool'] == 'malt' || params.run_metaphlan3 && it[0]['tool'] == 'metaphlan3' } UNTAR (ch_input_untar) ch_versions = ch_versions.mix(UNTAR.out.versions.first()) diff --git a/subworkflows/local/profiling.nf b/subworkflows/local/profiling.nf index f6eddb0..accab37 100644 --- a/subworkflows/local/profiling.nf +++ b/subworkflows/local/profiling.nf @@ -202,8 +202,10 @@ workflow PROFILING { ch_input_for_metaphlan3 = ch_input_for_profiling.metaphlan3 .filter{ - if (it[0].is_fasta || it[0].instrument_platform == 'OXFORD_NANOPORE' ) log.warn "[nf-core/taxprofiler] MetaPhlAn3 currently does not accept FASTA files as input and/or has not been evaluated for Nanopore data. Skipping MetaPhlAn3 for sample ${it[0].id}." - !(it[0].is_fasta || it[0].instrument_platform == 'OXFORD_NANOPORE') + if (it[0].is_fasta) log.warn "[nf-core/taxprofiler] MetaPhlAn3 currently does not accept FASTA files as input. Skipping MetaPhlAn3 for sample ${it[0].id}." + !it[0].is_fasta + if (it[0].instrument_platform == 'OXFORD_NANOPORE') log.warn "[nf-core/taxprofiler] MetaPhlAn3 has not been evaluated for Nanopore data. Skipping MetaPhlAn3 for sample ${it[0].id}." + !it[0].instrument_platform == 'OXFORD_NANOPORE' } .multiMap { it -> @@ -278,14 +280,13 @@ workflow PROFILING { [[id: db_meta.db_name, single_end: meta.single_end], reads, db_meta, db] } .groupTuple(by: [0,2,3]) - .dump(tag: "krakenuniq_premultimap") .multiMap { single_meta, reads, db_meta, db -> reads: [ single_meta + db_meta, reads.flatten() ] db: db } // Hardcode to _always_ produce the report file (which is our basic otput, and goes into) - KRAKENUNIQ_PRELOADEDKRAKENUNIQ ( ch_input_for_krakenuniq.reads.dump(tag: "krakenuniq_input"), ch_input_for_krakenuniq.db.dump(tag: "krakenuniq_db"), params.krakenuniq_ram_chunk_size, params.krakenuniq_save_reads, true, params.krakenuniq_save_readclassifications ) + KRAKENUNIQ_PRELOADEDKRAKENUNIQ ( ch_input_for_krakenuniq.reads, ch_input_for_krakenuniq.db, params.krakenuniq_ram_chunk_size, params.krakenuniq_save_reads, true, params.krakenuniq_save_readclassifications ) ch_multiqc_files = ch_multiqc_files.mix( KRAKENUNIQ_PRELOADEDKRAKENUNIQ.out.report ) ch_versions = ch_versions.mix( KRAKENUNIQ_PRELOADEDKRAKENUNIQ.out.versions.first() ) ch_raw_classifications = ch_raw_classifications.mix( KRAKENUNIQ_PRELOADEDKRAKENUNIQ.out.classified_assignment ) From 40f1e7b27aaa41484e4030340528302c398feeb1 Mon Sep 17 00:00:00 2001 From: "Moritz E. Beber" Date: Tue, 24 Jan 2023 14:15:51 +0100 Subject: [PATCH 6/6] fix: correct pattern to publish minimap2 index --- conf/modules.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/modules.config b/conf/modules.config index 28ede9d..30f057e 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -230,7 +230,7 @@ process { path: { "${params.outdir}/minimap2/index" }, mode: params.publish_dir_mode, enabled: params.save_hostremoval_index, - pattern: 'minimap2' + pattern: '*.mmi' ] }