From de74e4f727ae5c35b12c3203aa1462be4c985037 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Fri, 2 Dec 2022 12:47:14 +0100 Subject: [PATCH 1/5] Fix centrifuge warning, filter out long reads from Brakcen, add some caveat docs to usage.md --- docs/usage.md | 24 ++++++++++++++++++- subworkflows/local/profiling.nf | 10 +++++++- .../local/standardisation_profiles.nf | 10 ++++---- 3 files changed, 37 insertions(+), 7 deletions(-) diff --git a/docs/usage.md b/docs/usage.md index a87102c..d21761f 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -248,9 +248,31 @@ You can optionally save the FASTQ output of the run merging with the `--save_run ##### Profiling +###### Bracken + +It is unclear whether Bracken is suitable for running long reads, as it makes certain assumptions about read lengths. Furthemore, during testing we found issues where Bracken would fail on the long-read test data. Therefore nf-core/taxprofiler does not run Bracken on data specified as being sequenced with `OXFORD_NANOPORE` in the input samplesheet. If you believe this to be wrong, please contact us on the nf-core slack and we can discuss this. + +###### Centrifuge + +Centrifuge currently does not accept FASTA files as input, therefore no output will be produced for these input files. + +###### DIAMOND + +DIAMOND only allows output of a single format at a time, therefore parameters such --diamond_save_reads supplied will result in only aligned reads in SAM format will be produced, no taxonomic profiles will be available. Be aware of this when setting up your pipeline runs, depending n your particular use case. + ###### MALT -nf-core/taxprofiler uses MALT 0.4.1, which is a compatively old version. However it has been found that the most recent version of MALT (0.5.\*), at the time of writing, is broken. [The LCA step appears not to be executed](http://megan.informatik.uni-tuebingen.de/t/lca-placement-failure-with-malt-v-0-5-2-and-0-5-3/1996/3), pushing all hits to the leaves of the taxonomy. However, if you need to use a more recent taxonomy map file with your databases, the output of `malt-build` from MALT 0.5.3 should be still be compatible with `malt-run` of 0.4.1. +MALT does not support paired-end reads alignment (unlike other tools), therefore nf-core/taxprofiler aligns these as indepenent files if read-merging is skipped. If you skip merging, you can sum or average the results of the counts of the pairs. + +Krona can only be run on MALT output if path to Krona taxonomy database supplied to `--krona_taxonomy_directory`. Therefore if you do not supply the a KRona directory, Krona plots will not be produced for MALT. + +###### MetaPhlAn3 + +MetaPhlAn3 currently does not accept FASTA files as input, therefore no output will be produced for these input files. + +###### mOTUs + +mOTUs currently does not accept FASTA files as input, therefore no output will be produced for these input files. ### Updating the pipeline diff --git a/subworkflows/local/profiling.nf b/subworkflows/local/profiling.nf index aeee2a4..5256951 100644 --- a/subworkflows/local/profiling.nf +++ b/subworkflows/local/profiling.nf @@ -145,7 +145,15 @@ workflow PROFILING { if ( params.run_kraken2 && params.run_bracken ) { // Remove files from 'pure' kraken2 runs, so only those aligned against Bracken & kraken2 database are used. def ch_kraken2_output = KRAKEN2_KRAKEN2.out.report - .filter { meta, report -> meta['tool'] == 'bracken' } + .filter { + meta, report -> + if ( meta['instrument_platform'] == 'OXFORD_NANOPORE' ) log.warn "[nf-core/taxprofiler] Bracken has not been evaluated for long-read data. Skipping Bracken for sample ${meta.id}." + meta['tool'] == 'bracken' && meta['instrument_platform'] != 'OXFORD_NANOPORE' + + + } + + // If necessary, convert the eight column output to six column output. if (params.kraken2_save_minimizers) { diff --git a/subworkflows/local/standardisation_profiles.nf b/subworkflows/local/standardisation_profiles.nf index cbb0fab..8c73472 100644 --- a/subworkflows/local/standardisation_profiles.nf +++ b/subworkflows/local/standardisation_profiles.nf @@ -3,7 +3,7 @@ // include { KAIJU_KAIJU2TABLE } from '../../modules/nf-core/kaiju/kaiju2table/main' -include { KRAKENTOOLS_COMBINEKREPORTS } from '../../modules/nf-core/krakentools/combinekreports/main' +include { KRAKENTOOLS_COMBINEKREPORTS as KRAKENTOOLS_COMBINEKREPORTS_KRAKEN } from '../../modules/nf-core/krakentools/combinekreports/main' include { KRAKENTOOLS_COMBINEKREPORTS as KRAKENTOOLS_COMBINEKREPORTS_CENTRIFUGE } from '../../modules/nf-core/krakentools/combinekreports/main' include { METAPHLAN3_MERGEMETAPHLANTABLES } from '../../modules/nf-core/metaphlan3/mergemetaphlantables/main' include { MOTUS_MERGE } from '../../modules/nf-core/motus/merge/main' @@ -93,10 +93,10 @@ workflow STANDARDISATION_PROFILES { [[id:it[0]], it[1]] } - KRAKENTOOLS_COMBINEKREPORTS ( ch_profiles_for_kraken2 ) - ch_standardised_tables = ch_standardised_tables.mix( KRAKENTOOLS_COMBINEKREPORTS.out.txt ) - ch_multiqc_files = ch_multiqc_files.mix( KRAKENTOOLS_COMBINEKREPORTS.out.txt ) - ch_versions = ch_versions.mix( KRAKENTOOLS_COMBINEKREPORTS.out.versions ) + KRAKENTOOLS_COMBINEKREPORTS_KRAKEN ( ch_profiles_for_kraken2 ) + ch_standardised_tables = ch_standardised_tables.mix( KRAKENTOOLS_COMBINEKREPORTS_KRAKEN.out.txt ) + ch_multiqc_files = ch_multiqc_files.mix( KRAKENTOOLS_COMBINEKREPORTS_KRAKEN.out.txt ) + ch_versions = ch_versions.mix( KRAKENTOOLS_COMBINEKREPORTS_KRAKEN.out.versions ) // MetaPhlAn3 From ab42f6b66272240223ee7807ad08c5b8ac9c7b1c Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Fri, 2 Dec 2022 12:49:00 +0100 Subject: [PATCH 2/5] wteak message --- subworkflows/local/profiling.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/subworkflows/local/profiling.nf b/subworkflows/local/profiling.nf index b8eb086..ec15386 100644 --- a/subworkflows/local/profiling.nf +++ b/subworkflows/local/profiling.nf @@ -147,7 +147,7 @@ workflow PROFILING { def ch_kraken2_output = KRAKEN2_KRAKEN2.out.report .filter { meta, report -> - if ( meta['instrument_platform'] == 'OXFORD_NANOPORE' ) log.warn "[nf-core/taxprofiler] Bracken has not been evaluated for long-read data. Skipping Bracken for sample ${meta.id}." + if ( meta['instrument_platform'] == 'OXFORD_NANOPORE' ) log.warn "[nf-core/taxprofiler] Bracken has not been evaluated for Nanopore data. Skipping Bracken for sample ${meta.id}." meta['tool'] == 'bracken' && meta['instrument_platform'] != 'OXFORD_NANOPORE' From cf8ad9677124c57e8ade33d5d56de719e6eb9e5b Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Fri, 2 Dec 2022 15:09:08 +0100 Subject: [PATCH 3/5] Update subworkflows/local/profiling.nf --- subworkflows/local/profiling.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/subworkflows/local/profiling.nf b/subworkflows/local/profiling.nf index ec15386..2454ef3 100644 --- a/subworkflows/local/profiling.nf +++ b/subworkflows/local/profiling.nf @@ -147,8 +147,8 @@ workflow PROFILING { def ch_kraken2_output = KRAKEN2_KRAKEN2.out.report .filter { meta, report -> - if ( meta['instrument_platform'] == 'OXFORD_NANOPORE' ) log.warn "[nf-core/taxprofiler] Bracken has not been evaluated for Nanopore data. Skipping Bracken for sample ${meta.id}." meta['tool'] == 'bracken' && meta['instrument_platform'] != 'OXFORD_NANOPORE' + if ( meta['instrument_platform'] == 'OXFORD_NANOPORE' ) log.warn "[nf-core/taxprofiler] Bracken has not been evaluated for Nanopore data. Skipping Bracken for sample ${meta.id}." } From 0dccfddc249107e69f3dfdb748d304a0e2aaa676 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Sat, 3 Dec 2022 21:38:25 +0100 Subject: [PATCH 4/5] Switch order back --- subworkflows/local/profiling.nf | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/subworkflows/local/profiling.nf b/subworkflows/local/profiling.nf index 2454ef3..f5c970c 100644 --- a/subworkflows/local/profiling.nf +++ b/subworkflows/local/profiling.nf @@ -147,14 +147,10 @@ workflow PROFILING { def ch_kraken2_output = KRAKEN2_KRAKEN2.out.report .filter { meta, report -> - meta['tool'] == 'bracken' && meta['instrument_platform'] != 'OXFORD_NANOPORE' if ( meta['instrument_platform'] == 'OXFORD_NANOPORE' ) log.warn "[nf-core/taxprofiler] Bracken has not been evaluated for Nanopore data. Skipping Bracken for sample ${meta.id}." - - + meta['tool'] == 'bracken' && meta['instrument_platform'] != 'OXFORD_NANOPORE' } - - // If necessary, convert the eight column output to six column output. if (params.kraken2_save_minimizers) { ch_kraken2_output = KRAKEN2_STANDARD_REPORT(ch_kraken2_output).report From 0a0dffa37ca581386d17cb9dd4296e18697196f4 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Sun, 4 Dec 2022 12:06:33 +0100 Subject: [PATCH 5/5] Try separating MALT --- .github/workflows/ci.yml | 33 +++++++++++++++++++++++++++++++++ conf/test.config | 2 +- 2 files changed, 34 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index d179c6e..dad2838 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -134,3 +134,36 @@ jobs: with: command: nextflow run ${GITHUB_WORKSPACE} -profile test_krakenuniq,docker --outdir ./results attempt_limit: 3 + + malt: + name: Test MALT with workflow parameters + if: ${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/taxprofiler') }} + runs-on: ubuntu-latest + strategy: + matrix: + NXF_VER: + - "21.10.3" + - "latest-everything" + + steps: + - name: Check out pipeline code + uses: actions/checkout@v2 + + - name: Install Nextflow + uses: nf-core/setup-nextflow@v1 + with: + version: "${{ matrix.NXF_VER }}" + + - name: Show current locale + run: locale + + - name: Set UTF-8 enabled locale + run: | + sudo locale-gen en_US.UTF-8 + sudo update-locale LANG=en_US.UTF-8 + + - name: Run pipeline with test data + uses: Wandalen/wretry.action@v1.0.11 + with: + command: nextflow run ${GITHUB_WORKSPACE} -profile test_nothing,docker --run_malt --outdir ./results + attempt_limit: 3 diff --git a/conf/test.config b/conf/test.config index 898743e..db9f81d 100644 --- a/conf/test.config +++ b/conf/test.config @@ -35,7 +35,7 @@ params { run_kaiju = true run_kraken2 = true run_bracken = true - run_malt = true + run_malt = false run_metaphlan3 = true run_centrifuge = true run_diamond = true