From 5c549e5ce3e080b88383377a1d13d2083311accc Mon Sep 17 00:00:00 2001
From: James Fellows Yates <jfy133@gmail.com>
Date: Thu, 2 Mar 2023 13:49:03 +0100
Subject: [PATCH] Remove TODOs and references to under development

---
 .github/workflows/awsfulltest.yml |  1 -
 CITATIONS.md                      | 46 ++++++++++++++++++++++---------
 README.md                         |  8 +-----
 conf/base.config                  |  2 --
 docs/output.md                    |  2 --
 nextflow.config                   |  1 -
 workflows/taxprofiler.nf          |  3 +-
 7 files changed, 35 insertions(+), 28 deletions(-)

diff --git a/.github/workflows/awsfulltest.yml b/.github/workflows/awsfulltest.yml
index fe81d27..a8974a0 100644
--- a/.github/workflows/awsfulltest.yml
+++ b/.github/workflows/awsfulltest.yml
@@ -15,7 +15,6 @@ jobs:
     steps:
       - name: Launch workflow via tower
         uses: nf-core/tower-action@v3
-        # TODO nf-core: You can customise AWS full pipeline tests as required
         # Add full size test data (but still relatively small datasets for few samples)
         # on the `test_full.config` test runs with only one set of parameters
         with:
diff --git a/CITATIONS.md b/CITATIONS.md
index 2f75fdb..2ed0a2b 100644
--- a/CITATIONS.md
+++ b/CITATIONS.md
@@ -16,6 +16,10 @@
 
   > Ewels P, Magnusson M, Lundin S, Käller M. MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics. 2016 Oct 1;32(19):3047-8. doi: 10.1093/bioinformatics/btw354. Epub 2016 Jun 16. PubMed PMID: 27312411; PubMed Central PMCID: PMC5039924.
 
+- [falco](https://doi.org/10.12688/f1000research.21142.2)
+
+  > de Sena Brandine G and Smith AD. Falco: high-speed FastQC emulation for quality control of sequencing data. F1000Research 2021, 8:1874
+
 - [fastp](https://doi.org/10.1093/bioinformatics/bty560)
 
   > Chen, Shifu, Yanqing Zhou, Yaru Chen, and Jia Gu. 2018. Fastp: An Ultra-Fast All-in-One FASTQ Preprocessor. Bioinformatics 34 (17): i884-90. 10.1093/bioinformatics/bty560.
@@ -26,12 +30,30 @@
 
 - [Porechop](https://github.com/rrwick/Porechop)
 
+- [FILTLONG](https://github.com/rrwick/Filtlong)
+
 - [BBTools](http://sourceforge.net/projects/bbmap/)
 
 - [PRINSEQ++](https://doi.org/10.7287/peerj.preprints.27553v1)
 
   > Cantu, Vito Adrian, Jeffrey Sadural, and Robert Edwards. 2019. PRINSEQ++, a Multi-Threaded Tool for Fast and Efficient Quality Control and Preprocessing of Sequencing Datasets. e27553v1. PeerJ Preprints. doi: 10.7287/peerj.preprints.27553v1.
 
+- [Bowtie2](https://doi.org/10.1038/nmeth.1923)
+
+  > Langmead, B., & Salzberg, S. L. (2012). Fast gapped-read alignment with Bowtie 2. Nature Methods, 9(4), 357–359. doi: 10.1038/nmeth.1923
+
+- [minimap2](https://doi.org/10.1093/bioinformatics/bty191)
+
+  > Li, H. (2018). Minimap2: pairwise alignment for nucleotide sequences. Bioinformatics , 34(18), 3094–3100. doi: 10.1093/bioinformatics/bty191
+
+- [SAMTools](https://doi.org/10.1093/gigascience/giab008)
+
+  > Danecek, P., Bonfield, J. K., Liddle, J., Marshall, J., Ohan, V., Pollard, M. O., Whitwham, A., Keane, T., McCarthy, S. A., Davies, R. M., & Li, H. (2021). Twelve years of SAMtools and BCFtools. GigaScience, 10(2). doi: 10.1093/gigascience/giab008
+
+- [Bracken](https://doi.org/10.7717/peerj-cs.104)
+
+  > Lu, J., Breitwieser, F. P., Thielen, P., & Salzberg, S. L. (2017). Bracken: Estimating species abundance in metagenomics data. PeerJ Computer Science, 3, e104. doi: 10.7717/peerj-cs.104
+
 - [Kraken2](https://doi.org/10.1186/s13059-019-1891-0)
 
   > Wood, Derrick E., Jennifer Lu, and Ben Langmead. 2019. Improved Metagenomic Analysis with Kraken 2. Genome Biology 20 (1): 257. doi: 10.1186/s13059-019-1891-0.
@@ -40,13 +62,9 @@
 
   > Breitwieser, Florian P., Daniel N. Baker, and Steven L. Salzberg. 2018. KrakenUniq: confident and fast metagenomics classification using unique k-mer counts. Genome Biology 19 (1): 198. doi: 10.1186/s13059-018-1568-0
 
-- [Bracken](https://doi.org/10.7717/peerj-cs.104)
+  - [MetaPhlAn3](https://doi.org/10.7554/eLife.65088)
 
-  > Lu, J., Breitwieser, F. P., Thielen, P., & Salzberg, S. L. (2017). Bracken: Estimating species abundance in metagenomics data. PeerJ Computer Science, 3, e104. doi: 10.7717/peerj-cs.104
-
-- [Krona](https://doi.org/10.1186/1471-2105-12-385)
-
-  > Ondov, Brian D., Nicholas H. Bergman, and Adam M. Phillippy. 2011. Interactive metagenomic visualization in a Web browser. BMC Bioinformatics 12 (1): 385. doi: 10.1186/1471-2105-12-385.
+  > Beghini, Francesco, Lauren J McIver, Aitor Blanco-Míguez, Leonard Dubois, Francesco Asnicar, Sagun Maharjan, Ana Mailyan, et al. 2021. “Integrating Taxonomic, Functional, and Strain-Level Profiling of Diverse Microbial Communities with BioBakery 3.” Edited by Peter Turnbaugh, Eduardo Franco, and C Titus Brown. ELife 10 (May): e65088. doi: 10.7554/eLife.65088
 
 - [MALT](https://doi.org/10.1038/s41559-017-0446-6)
 
@@ -56,23 +74,25 @@
 
   > Huson, Daniel H., Sina Beier, Isabell Flade, Anna Górska, Mohamed El-Hadidi, Suparna Mitra, Hans-Joachim Ruscheweyh, and Rewati Tappu. 2016. “MEGAN Community Edition - Interactive Exploration and Analysis of Large-Scale Microbiome Sequencing Data.” PLoS Computational Biology 12 (6): e1004957. doi: 10.1371/journal.pcbi.1004957.
 
-- [MetaPhlAn3](https://doi.org/10.7554/eLife.65088)
+- [DIAMOND](https://doi.org/10.1038/nmeth.3176)
 
-  > Beghini, Francesco, Lauren J McIver, Aitor Blanco-Míguez, Leonard Dubois, Francesco Asnicar, Sagun Maharjan, Ana Mailyan, et al. 2021. “Integrating Taxonomic, Functional, and Strain-Level Profiling of Diverse Microbial Communities with BioBakery 3.” Edited by Peter Turnbaugh, Eduardo Franco, and C Titus Brown. ELife 10 (May): e65088. doi: 10.7554/eLife.65088
+> Buchfink, Benjamin, Chao Xie, and Daniel H. Huson. 2015. “Fast and Sensitive Protein Alignment Using DIAMOND.” Nature Methods 12 (1): 59-60. doi: 10.1038/nmeth.3176.
 
 - [Centrifuge](https://doi.org/10.1101/gr.210641.116)
 
   > Kim, Daehwan, Li Song, Florian P. Breitwieser, and Steven L. Salzberg. 2016. “Centrifuge: Rapid and Sensitive Classification of Metagenomic Sequences.” Genome Research 26 (12): 1721-29. doi: 10.1101/gr.210641.116.
 
-- [DIAMOND](https://doi.org/10.1038/nmeth.3176)
+- [Kaiju](https://doi.org/10.1038/ncomms11257)
 
-> Buchfink, Benjamin, Chao Xie, and Daniel H. Huson. 2015. “Fast and Sensitive Protein Alignment Using DIAMOND.” Nature Methods 12 (1): 59-60. doi: 10.1038/nmeth.3176.
+  > Menzel, P., Ng, K. L., & Krogh, A. (2016). Fast and sensitive taxonomic classification for metagenomics with Kaiju. Nature Communications, 7, 11257. doi: 10.1038/ncomms11257
 
-- [FILTLONG](https://github.com/rrwick/Filtlong)
+- [mOTUs](https://doi.org/10.1186/s40168-022-01410-z)
 
-- [falco](https://doi.org/10.12688/f1000research.21142.2)
+  > Ruscheweyh, H.-J., Milanese, A., Paoli, L., Karcher, N., Clayssen, Q., Keller, M. I., Wirbel, J., Bork, P., Mende, D. R., Zeller, G., & Sunagawa, S. (2022). Cultivation-independent genomes greatly expand taxonomic-profiling capabilities of mOTUs across various environments. Microbiome, 10(1), 212. doi: 10.1186/s40168-022-01410-z
 
-> de Sena Brandine G and Smith AD. Falco: high-speed FastQC emulation for quality control of sequencing data. F1000Research 2021, 8:1874
+- [Krona](https://doi.org/10.1186/1471-2105-12-385)
+
+  > Ondov, Brian D., Nicholas H. Bergman, and Adam M. Phillippy. 2011. Interactive metagenomic visualization in a Web browser. BMC Bioinformatics 12 (1): 385. doi: 10.1186/1471-2105-12-385.
 
 ## Software packaging/containerisation tools
 
diff --git a/README.md b/README.md
index 9c38e9b..1e43ab8 100644
--- a/README.md
+++ b/README.md
@@ -12,8 +12,6 @@
 
 ## Introduction
 
-> ⚠️ This pipeline is still under development! While the pipeline is usable, not all functionality will be available!
-
 **nf-core/taxprofiler** is a bioinformatics best-practice analysis pipeline for taxonomic classification and profiling of shotgun metagenomic data. It allows for in-parallel taxonomic identification of reads or taxonomic abundance estimation with multiple classification and profiling tools against multiple databases, produces standardised output tables.
 
 The pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool to run tasks across multiple compute infrastructures in a very portable manner. It uses Docker/Singularity containers making installation trivial and results highly reproducible. The [Nextflow DSL2](https://www.nextflow.io/docs/latest/dsl2.html) implementation of this pipeline uses one container per process which makes it much easier to maintain and update software dependencies. Where possible, these processes have been submitted to and installed from [nf-core/modules](https://github.com/nf-core/modules) in order to make them available to all nf-core pipelines, and to everyone within the Nextflow community!
@@ -24,8 +22,6 @@ The nf-core/taxprofiler CI test dataset uses sequencing data from [Maixer et al.
 
 ## Pipeline summary
 
-<!-- TODO nf-core: Fill in short bullet-pointed list of the default steps in the pipeline -->
-
 ![](docs/images/taxprofiler_tube.png)
 
 1. Read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/) or [`falco`](https://github.com/smithlabcode/falco) as an alternative option)
@@ -46,7 +42,7 @@ The nf-core/taxprofiler CI test dataset uses sequencing data from [Maixer et al.
    - [KrakenUniq](https://github.com/fbreitwieser/krakenuniq)
 5. Perform optional post-processing with:
    - [bracken](https://ccb.jhu.edu/software/bracken/)
-6. Standardises output tables
+6. Standardises output tables ([`Taxpasta`](https://taxpasta.readthedocs.io))
 7. Present QC for raw reads ([`MultiQC`](http://multiqc.info/))
 8. Plotting Kraken2, Centrifuge, Kaiju and MALT results ([`Krona`](https://hpc.nih.gov/apps/kronatools.html))
 
@@ -98,8 +94,6 @@ For further information or help, don't hesitate to get in touch on the [Slack `#
 <!-- TODO nf-core: Add citation for pipeline after first release. Uncomment lines below and update Zenodo doi and badge at the top of this file. -->
 <!-- If you use  nf-core/taxprofiler for your analysis, please cite it using the following doi: [10.5281/zenodo.XXXXXX](https://doi.org/10.5281/zenodo.XXXXXX) -->
 
-<!-- TODO nf-core: Add bibliography of tools and data used in your pipeline -->
-
 An extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.
 
 You can cite the `nf-core` publication as follows:
diff --git a/conf/base.config b/conf/base.config
index 7470db1..2f63669 100644
--- a/conf/base.config
+++ b/conf/base.config
@@ -10,7 +10,6 @@
 
 process {
 
-    // TODO nf-core: Check the defaults for all processes
     cpus   = { check_max( 1    * task.attempt, 'cpus'   ) }
     memory = { check_max( 6.GB * task.attempt, 'memory' ) }
     time   = { check_max( 4.h  * task.attempt, 'time'   ) }
@@ -24,7 +23,6 @@ process {
     //        These labels are used and recognised by default in DSL2 files hosted on nf-core/modules.
     //        If possible, it would be nice to keep the same label naming convention when
     //        adding in your local modules too.
-    // TODO nf-core: Customise requirements for specific processes.
     // See https://www.nextflow.io/docs/latest/config.html#config-process-selectors
     withLabel:process_single {
         cpus   = { check_max( 1                  , 'cpus'    ) }
diff --git a/docs/output.md b/docs/output.md
index 0d35bdd..cbb04f8 100644
--- a/docs/output.md
+++ b/docs/output.md
@@ -6,8 +6,6 @@ This document describes the output produced by the pipeline. Most of the plots a
 
 The directories listed below will be created in the results directory after the pipeline has finished. All paths are relative to the top-level results directory.
 
-<!-- TODO nf-core: Write this documentation describing your workflow's output -->
-
 ## Pipeline overview
 
 The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes data using the following steps:
diff --git a/nextflow.config b/nextflow.config
index 19ca198..885c439 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -9,7 +9,6 @@
 // Global default params, used in configs
 params {
 
-    // TODO nf-core: Specify your pipeline's command line flags
     // Input options
     input                      = null
 
diff --git a/workflows/taxprofiler.nf b/workflows/taxprofiler.nf
index 12a41f3..e44015f 100644
--- a/workflows/taxprofiler.nf
+++ b/workflows/taxprofiler.nf
@@ -9,13 +9,13 @@ def summary_params = NfcoreSchema.paramsSummaryMap(workflow, params)
 // Validate input parameters
 WorkflowTaxprofiler.initialise(params, log)
 
-// TODO nf-core: Add all file path parameters for the pipeline to the list below
 // Check input path parameters to see if they exist
 def checkPathParamList = [ params.input, params.genome, params.databases,
                             params.outdir, params.longread_hostremoval_index,
                             params.hostremoval_reference, params.shortread_hostremoval_index,
                             params.multiqc_config, params.shortread_qc_adapterlist,
                             params.krona_taxonomy_directory,
+                            params.taxpasta_taxonomy_dir,
                             params.multiqc_logo, params.multiqc_methods_description
                         ]
 for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true) } }
@@ -301,7 +301,6 @@ workflow TAXPROFILER {
         ch_multiqc_files = ch_multiqc_files.mix( STANDARDISATION_PROFILES.out.mqc.collect{it[1]}.ifEmpty([]) )
     }
 
-    // TODO create multiQC module for metaphlan
     MULTIQC (
         ch_multiqc_files.collect(),
         ch_multiqc_config.toList(),