From de6a4214ef78a53d0d39f0226a8c0b65afeecd32 Mon Sep 17 00:00:00 2001
From: sofstam <sofia.stamouli@scilifelab.se>
Date: Thu, 16 Feb 2023 14:29:52 +0100
Subject: [PATCH 01/12] Add taxpasta_merge to taxprofiler

---
 conf/modules.config                           |  15 ++
 conf/test.config                              |   4 +-
 modules.json                                  | 173 +++++++++++++-----
 modules/nf-core/taxpasta/merge/main.nf        |  47 +++++
 modules/nf-core/taxpasta/merge/meta.yml       |  58 ++++++
 nextflow.config                               |  16 +-
 nextflow_schema.json                          |  16 +-
 .../local/standardisation_profiles.nf         |  20 ++
 8 files changed, 302 insertions(+), 47 deletions(-)
 create mode 100644 modules/nf-core/taxpasta/merge/main.nf
 create mode 100644 modules/nf-core/taxpasta/merge/meta.yml

diff --git a/conf/modules.config b/conf/modules.config
index 8ef8728..51b27ad 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -533,6 +533,21 @@ process {
         ]
     }
 
+    withName: TAXPASTA_MERGE {
+        ext.args =  {
+            [
+                "-p ${meta.tool} -o ${meta.tool}_${meta.id}.${params.taxpasta_standardisation_format}",
+                params.taxpasta_add_taxonomy ? "-p" : "",
+                params.taxpasta_add_samplesheet ? "-s" :""
+            ].join(',').replaceAll(','," ")
+            }
+        publishDir = [
+            path: { "${params.outdir}/taxpasta/" },
+            mode: params.publish_dir_mode,
+            pattern: '*.{tsv,csv,arrow,parquet,biom}'
+        ]
+    }
+
     withName: CUSTOM_DUMPSOFTWAREVERSIONS {
         publishDir = [
             path: { "${params.outdir}/pipeline_info" },
diff --git a/conf/test.config b/conf/test.config
index cfd371a..682d087 100644
--- a/conf/test.config
+++ b/conf/test.config
@@ -25,7 +25,7 @@ params {
     perform_shortread_qc                  = true
     perform_longread_qc                   = true
     shortread_qc_mergepairs               = true
-    perform_shortread_complexityfilter    = true
+    perform_shortread_complexityfilter    = false
     perform_shortread_hostremoval         = true
     perform_longread_hostremoval          = true
     perform_runmerging                    = true
@@ -44,7 +44,7 @@ params {
     malt_save_reads                       = true
     kraken2_save_reads                    = true
     centrifuge_save_reads                 = true
-    diamond_save_reads                    = true
+    run_profile_standardisation           = true
 }
 
 process {
diff --git a/modules.json b/modules.json
index 2375da6..9b87f43 100644
--- a/modules.json
+++ b/modules.json
@@ -8,212 +8,301 @@
                     "adapterremoval": {
                         "branch": "master",
                         "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
-                        "installed_by": ["modules"]
+                        "installed_by": [
+                            "modules"
+                        ]
                     },
                     "bbmap/bbduk": {
                         "branch": "master",
                         "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
-                        "installed_by": ["modules"]
+                        "installed_by": [
+                            "modules"
+                        ]
                     },
                     "bowtie2/align": {
                         "branch": "master",
                         "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
-                        "installed_by": ["modules"]
+                        "installed_by": [
+                            "modules"
+                        ]
                     },
                     "bowtie2/build": {
                         "branch": "master",
                         "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
-                        "installed_by": ["modules"]
+                        "installed_by": [
+                            "modules"
+                        ]
                     },
                     "bracken/bracken": {
                         "branch": "master",
                         "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
-                        "installed_by": ["modules"]
+                        "installed_by": [
+                            "modules"
+                        ]
                     },
                     "bracken/combinebrackenoutputs": {
                         "branch": "master",
                         "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
-                        "installed_by": ["modules"]
+                        "installed_by": [
+                            "modules"
+                        ]
                     },
                     "cat/fastq": {
                         "branch": "master",
                         "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
-                        "installed_by": ["modules"]
+                        "installed_by": [
+                            "modules"
+                        ]
                     },
                     "centrifuge/centrifuge": {
                         "branch": "master",
                         "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
-                        "installed_by": ["modules"]
+                        "installed_by": [
+                            "modules"
+                        ]
                     },
                     "centrifuge/kreport": {
                         "branch": "master",
                         "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
-                        "installed_by": ["modules"]
+                        "installed_by": [
+                            "modules"
+                        ]
                     },
                     "custom/dumpsoftwareversions": {
                         "branch": "master",
                         "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
-                        "installed_by": ["modules"]
+                        "installed_by": [
+                            "modules"
+                        ]
                     },
                     "diamond/blastx": {
                         "branch": "master",
                         "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
-                        "installed_by": ["modules"]
+                        "installed_by": [
+                            "modules"
+                        ]
                     },
                     "falco": {
                         "branch": "master",
                         "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
-                        "installed_by": ["modules"],
+                        "installed_by": [
+                            "modules"
+                        ],
                         "patch": "modules/nf-core/falco/falco.diff"
                     },
                     "fastp": {
                         "branch": "master",
                         "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
-                        "installed_by": ["modules"]
+                        "installed_by": [
+                            "modules"
+                        ]
                     },
                     "fastqc": {
                         "branch": "master",
                         "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
-                        "installed_by": ["modules"]
+                        "installed_by": [
+                            "modules"
+                        ]
                     },
                     "filtlong": {
                         "branch": "master",
                         "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
-                        "installed_by": ["modules"]
+                        "installed_by": [
+                            "modules"
+                        ]
                     },
                     "gunzip": {
                         "branch": "master",
                         "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
-                        "installed_by": ["modules"]
+                        "installed_by": [
+                            "modules"
+                        ]
                     },
                     "kaiju/kaiju": {
                         "branch": "master",
                         "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
-                        "installed_by": ["modules"]
+                        "installed_by": [
+                            "modules"
+                        ]
                     },
                     "kaiju/kaiju2krona": {
                         "branch": "master",
                         "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
-                        "installed_by": ["modules"]
+                        "installed_by": [
+                            "modules"
+                        ]
                     },
                     "kaiju/kaiju2table": {
                         "branch": "master",
                         "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
-                        "installed_by": ["modules"]
+                        "installed_by": [
+                            "modules"
+                        ]
                     },
                     "kraken2/kraken2": {
                         "branch": "master",
                         "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
-                        "installed_by": ["modules"]
+                        "installed_by": [
+                            "modules"
+                        ]
                     },
                     "krakentools/combinekreports": {
                         "branch": "master",
                         "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
-                        "installed_by": ["modules"]
+                        "installed_by": [
+                            "modules"
+                        ]
                     },
                     "krakentools/kreport2krona": {
                         "branch": "master",
                         "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
-                        "installed_by": ["modules"]
+                        "installed_by": [
+                            "modules"
+                        ]
                     },
                     "krakenuniq/preloadedkrakenuniq": {
                         "branch": "master",
                         "git_sha": "a6eb17f65b3ee5761c25c075a6166c9f76733cee",
-                        "installed_by": ["modules"]
+                        "installed_by": [
+                            "modules"
+                        ]
                     },
                     "krona/ktimporttaxonomy": {
                         "branch": "master",
                         "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
-                        "installed_by": ["modules"]
+                        "installed_by": [
+                            "modules"
+                        ]
                     },
                     "krona/ktimporttext": {
                         "branch": "master",
                         "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
-                        "installed_by": ["modules"]
+                        "installed_by": [
+                            "modules"
+                        ]
                     },
                     "malt/run": {
                         "branch": "master",
                         "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
-                        "installed_by": ["modules"]
+                        "installed_by": [
+                            "modules"
+                        ]
                     },
                     "megan/rma2info": {
                         "branch": "master",
                         "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
-                        "installed_by": ["modules"]
+                        "installed_by": [
+                            "modules"
+                        ]
                     },
                     "metaphlan3/mergemetaphlantables": {
                         "branch": "master",
                         "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
-                        "installed_by": ["modules"]
+                        "installed_by": [
+                            "modules"
+                        ]
                     },
                     "metaphlan3/metaphlan3": {
                         "branch": "master",
                         "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
-                        "installed_by": ["modules"]
+                        "installed_by": [
+                            "modules"
+                        ]
                     },
                     "minimap2/align": {
                         "branch": "master",
                         "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
-                        "installed_by": ["modules"]
+                        "installed_by": [
+                            "modules"
+                        ]
                     },
                     "minimap2/index": {
                         "branch": "master",
                         "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
-                        "installed_by": ["modules"]
+                        "installed_by": [
+                            "modules"
+                        ]
                     },
                     "motus/merge": {
                         "branch": "master",
                         "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
-                        "installed_by": ["modules"]
+                        "installed_by": [
+                            "modules"
+                        ]
                     },
                     "motus/profile": {
                         "branch": "master",
                         "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
-                        "installed_by": ["modules"]
+                        "installed_by": [
+                            "modules"
+                        ]
                     },
                     "multiqc": {
                         "branch": "master",
                         "git_sha": "ee80d14721e76e2e079103b8dcd5d57129e584ba",
-                        "installed_by": ["modules"]
+                        "installed_by": [
+                            "modules"
+                        ]
                     },
                     "porechop/porechop": {
                         "branch": "master",
                         "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
-                        "installed_by": ["modules"],
+                        "installed_by": [
+                            "modules"
+                        ],
                         "patch": "modules/nf-core/porechop/porechop/porechop-porechop.diff"
                     },
                     "prinseqplusplus": {
                         "branch": "master",
                         "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
-                        "installed_by": ["modules"]
+                        "installed_by": [
+                            "modules"
+                        ]
                     },
                     "samtools/bam2fq": {
                         "branch": "master",
                         "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
-                        "installed_by": ["modules"]
+                        "installed_by": [
+                            "modules"
+                        ]
                     },
                     "samtools/index": {
                         "branch": "master",
                         "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
-                        "installed_by": ["modules"]
+                        "installed_by": [
+                            "modules"
+                        ]
                     },
                     "samtools/stats": {
                         "branch": "master",
                         "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
-                        "installed_by": ["modules"]
+                        "installed_by": [
+                            "modules"
+                        ]
                     },
                     "samtools/view": {
                         "branch": "master",
                         "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
-                        "installed_by": ["modules"]
+                        "installed_by": [
+                            "modules"
+                        ]
+                    },
+                    "taxpasta/merge": {
+                        "branch": "master",
+                        "git_sha": "74ab450ed05e034d049c00f6e2853de2c31594b4",
+                        "installed_by": [
+                            "modules"
+                        ]
                     },
                     "untar": {
                         "branch": "master",
                         "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
-                        "installed_by": ["modules"]
+                        "installed_by": [
+                            "modules"
+                        ]
                     }
                 }
             }
         }
     }
-}
+}
\ No newline at end of file
diff --git a/modules/nf-core/taxpasta/merge/main.nf b/modules/nf-core/taxpasta/merge/main.nf
new file mode 100644
index 0000000..67a6f25
--- /dev/null
+++ b/modules/nf-core/taxpasta/merge/main.nf
@@ -0,0 +1,47 @@
+process TAXPASTA_MERGE {
+    tag "$meta.id"
+    label 'process_single'
+
+    conda "bioconda::taxpasta=0.1.1"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/taxpasta:0.1.1--pyhdfd78af_0':
+        'quay.io/biocontainers/taxpasta:0.1.1--pyhdfd78af_0' }"
+
+
+    input:
+    tuple val(meta), path(profiles)
+    path taxonomy
+    path samplesheet
+
+    output:
+    tuple val(meta), path("*.{tsv,csv,arrow,parquet,biom}"), emit: merged_profiles
+    path "versions.yml"                                    , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    // N.B.: Taxpasta requires a --profiler option and will fail without it.
+    // This must be specified via a `nextflow.config` or `modules.config`, for
+    // example, as "--profiler kraken2". Additionally, it requires a --output
+    // option with the output file name. The desired format will be parsed from
+    // the name and should correspond to the output pattern specified above,
+    // e.g., "--output ${task.ext.prefix}.tsv".
+    def args = task.ext.args ?: ''
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    def taxonomy_option = taxonomy ? "--taxonomy ${taxonomy}" : ''
+    def samplesheet_input = samplesheet ? "-s ${samplesheet}" : ''
+    """
+    taxpasta merge \\
+        $args \\
+        $taxonomy_option \\
+        $samplesheet_input \\
+        $profiles
+
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        taxpasta: \$(taxpasta --version)
+    END_VERSIONS
+    """
+}
diff --git a/modules/nf-core/taxpasta/merge/meta.yml b/modules/nf-core/taxpasta/merge/meta.yml
new file mode 100644
index 0000000..79d301f
--- /dev/null
+++ b/modules/nf-core/taxpasta/merge/meta.yml
@@ -0,0 +1,58 @@
+name: "taxpasta_merge"
+description: Standardise and merge two or more taxonomic profiles into a single table
+keywords:
+  - taxonomic profile
+  - standardise
+  - standardisation
+  - metagenomics
+  - taxonomic profiling
+  - otu tables
+  - taxon tables
+tools:
+  - "taxpasta":
+      description: "TAXonomic Profile Aggregation and STAndardisation"
+      homepage: "https://taxpasta.readthedocs.io/"
+      documentation: "https://taxpasta.readthedocs.io/"
+      tool_dev_url: "https://github.com/taxprofiler/taxpasta"
+      doi: ""
+      licence: "['Apache-2.0']"
+
+input:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - profiles:
+      type: file
+      description: A list of taxonomic profiler output files (typically in text format, mandatory)
+      pattern: "*.{tsv,csv,arrow,parquet,biom}"
+  - samplesheet:
+      type: file
+      description:
+        A samplesheet describing the sample name and a filepath to a taxonomic abundance profile that needs to be relative
+        from the work environment. The profiles must be provided even if you give a samplesheet as argument (optional)
+      pattern: "*.{tsv,csv,ods,xlsx,arrow,parquet}"
+  - taxonomy:
+      type: directory
+      description: Directory containing at a minimum nodes.dmp and names.dmp files (optional)
+      pattern: "*/"
+
+output:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - versions:
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
+  - merged_profiles:
+      type: file
+      description: Output file with standardised multiple profiles in one go and have all profiles combined into a single table.
+      pattern: "*.{tsv,csv,ods,xlsx,arrow,parquet,biom}"
+
+authors:
+  - "@sofstam"
+  - "@jfy133"
diff --git a/nextflow.config b/nextflow.config
index 45a3cc7..52a72db 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -155,8 +155,11 @@ params {
     krona_taxonomy_directory   = null
 
     // profile standardisation
-    run_profile_standardisation = false
-    generate_biom_output        = false
+    run_profile_standardisation             = false
+    taxpasta_add_taxonomy                   = false
+    taxpasta_add_samplesheet                = false
+    taxpasta_standardisation_format         = 'tsv'
+    generate_biom_output                    = false
 }
 
 // Load base.config by default for all pipelines
@@ -242,6 +245,15 @@ profiles {
         executor.cpus          = 16
         executor.memory        = 60.GB
     }
+    hasta {
+        includeConfig 'conf/hasta.config'
+    }
+    dev_priority {
+        params {
+            priority = 'development'
+            clusterOptions = "--qos=low"
+        }
+     }
     test                    { includeConfig 'conf/test.config'      }
     test_full               { includeConfig 'conf/test_full.config' }
     test_noprofiling        { includeConfig 'conf/test_noprofiling.config' }
diff --git a/nextflow_schema.json b/nextflow_schema.json
index 89cad56..a84a232 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -767,5 +767,19 @@
         {
             "$ref": "#/definitions/reference_genome_options"
         }
-    ]
+    ],
+    "properties": {
+        "add_taxonomy": {
+            "type": "string",
+            "default": "false"
+        },
+        "add_samplesheet": {
+            "type": "string",
+            "default": "false"
+        },
+        "standardisation_taxpasta_format": {
+            "type": "string",
+            "default": "tsv"
+        }
+    }
 }
diff --git a/subworkflows/local/standardisation_profiles.nf b/subworkflows/local/standardisation_profiles.nf
index 582aaed..0a416a9 100644
--- a/subworkflows/local/standardisation_profiles.nf
+++ b/subworkflows/local/standardisation_profiles.nf
@@ -8,6 +8,7 @@ include { KRAKENTOOLS_COMBINEKREPORTS as KRAKENTOOLS_COMBINEKREPORTS_KRAKEN
 include { KRAKENTOOLS_COMBINEKREPORTS as KRAKENTOOLS_COMBINEKREPORTS_CENTRIFUGE } from '../../modules/nf-core/krakentools/combinekreports/main'
 include { METAPHLAN3_MERGEMETAPHLANTABLES                                       } from '../../modules/nf-core/metaphlan3/mergemetaphlantables/main'
 include { MOTUS_MERGE                                                           } from '../../modules/nf-core/motus/merge/main'
+include { TAXPASTA_MERGE                                                        } from '../../modules/nf-core/taxpasta/merge/main'
 
 workflow STANDARDISATION_PROFILES {
     take:
@@ -21,6 +22,20 @@ workflow STANDARDISATION_PROFILES {
     ch_versions            = Channel.empty()
     ch_multiqc_files       = Channel.empty()
 
+    //Taxpasta standardisation
+    ch_input_for_taxpasta = profiles
+                            .map {
+                                meta, profile ->
+                                    def meta_new = [:]
+                                    meta_new.id = meta.db_name
+                                    meta_new.tool = meta.tool == 'metaphlan3' ? 'metaphlan' : meta.tool == 'malt' ? 'megan6' : meta.tool
+                                    [meta_new, profile]
+                                }
+                                .groupTuple ()
+
+    TAXPASTA_MERGE (ch_input_for_taxpasta, [], [])
+
+
     /*
         Split profile results based on tool they come from
     */
@@ -74,6 +89,8 @@ workflow STANDARDISATION_PROFILES {
                                     [[id:it[0]], it[1]]
                                 }
 
+
+
     KRAKENTOOLS_COMBINEKREPORTS_CENTRIFUGE ( ch_profiles_for_centrifuge )
     ch_standardised_tables = ch_standardised_tables.mix( KRAKENTOOLS_COMBINEKREPORTS_CENTRIFUGE.out.txt )
     ch_multiqc_files = ch_multiqc_files.mix( KRAKENTOOLS_COMBINEKREPORTS_CENTRIFUGE.out.txt )
@@ -125,6 +142,8 @@ workflow STANDARDISATION_PROFILES {
     ch_multiqc_files = ch_multiqc_files.mix( METAPHLAN3_MERGEMETAPHLANTABLES.out.txt )
     ch_versions = ch_versions.mix( METAPHLAN3_MERGEMETAPHLANTABLES.out.versions )
 
+    ch_standardised_tables.dump (tag: 'standardised')
+
     // mOTUs
 
     // mOTUs has a 'single' database, and cannot create custom ones.
@@ -149,6 +168,7 @@ workflow STANDARDISATION_PROFILES {
 
     emit:
     tables   = ch_standardised_tables
+    taxpasta = TAXPASTA_MERGE.out.merged_profiles
     versions = ch_versions
     mqc      = ch_multiqc_files
 }

From 25d44c955e9d8d4e3050e6ed189710a1f8e23fde Mon Sep 17 00:00:00 2001
From: sofstam <sofia.stamouli@scilifelab.se>
Date: Thu, 16 Feb 2023 15:16:05 +0100
Subject: [PATCH 02/12] Merge dev into add_taxpasta_merge

---
 nextflow.config | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/nextflow.config b/nextflow.config
index 52a72db..9d3af4c 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -245,15 +245,6 @@ profiles {
         executor.cpus          = 16
         executor.memory        = 60.GB
     }
-    hasta {
-        includeConfig 'conf/hasta.config'
-    }
-    dev_priority {
-        params {
-            priority = 'development'
-            clusterOptions = "--qos=low"
-        }
-     }
     test                    { includeConfig 'conf/test.config'      }
     test_full               { includeConfig 'conf/test_full.config' }
     test_noprofiling        { includeConfig 'conf/test_noprofiling.config' }

From dad64a688dc9a0858d7d38787bc57468f81834d6 Mon Sep 17 00:00:00 2001
From: sofstam <sofia.stamouli@scilifelab.se>
Date: Thu, 16 Feb 2023 16:33:36 +0100
Subject: [PATCH 03/12] Add documentation for taxpasta module

---
 docs/output.md       |  24 ++++++
 modules.json         | 170 +++++++++++--------------------------------
 nextflow_schema.json |  31 ++++----
 3 files changed, 83 insertions(+), 142 deletions(-)

diff --git a/docs/output.md b/docs/output.md
index be681bd..4ffad3f 100644
--- a/docs/output.md
+++ b/docs/output.md
@@ -33,6 +33,7 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d
 - [MALT](#malt) - Sequence alignment and analysis tool designed for processing high-throughput sequencing data, especially in the context of metagenomics
 - [MetaPhlAn3](#metaphlan3) - Genome-level marker gene based taxonomic classifier
 - [mOTUs](#motus) - Tool for marker gene-based OTU (mOTU) profiling.
+- [TAXPASTA](#taxpasta) -  Tool to standardise taxonomic profiles as well as merge profiles across samples for the same classifier/profiler.
 - [MultiQC](#multiqc) - Aggregate report describing results and QC from the whole pipeline
 - [Pipeline information](#pipeline-information) - Report metrics generated during the workflow execution
 
@@ -435,6 +436,29 @@ Krona charts will be generated by the pipeline for supported tools (Kraken2, Cen
 
 The resulting HTML files can be loaded into your web browser for exploration. Each file will have a dropdown to allow you to switch between each sample aligned against the given database of the tool.
 
+### TAXPASTA
+
+[TAXPASTA](https://github.com/taxprofiler/taxpasta) is a python package added as a module in nf-core/taxprofiler that can standardise and merge two or more taxonomic profiles across samples into one single table..
+
+<details markdown="1">
+<summary>Output files</summary>
+
+- `taxpasta`
+  - `<db_name>.<sample_id>*.{tsv,csv,arrow,parquet,biom}`: A list of taxonomic profiler output files. The standard format is the `tsv`. The first column describes the taxonomy ID and the rest of the columns describe the read counts for each sample.
+</details>
+
+The following report files are used for the taxpasta step:
+
+- Bracken: `<sample>_<db_name>.tsv`
+- Centrifuge: `<sample_id>.centrifuge.txt`
+- Diamond: `<sample_id>`
+- Kaiju: `<sample_id>_<db_name>.kaijutable.txt`
+- KrakenUniq: `<sample_id>_<db_name>.report.txt`
+- Kraken2: `<sample_id>_<db_name>.report.txt`
+- MALT: `<sample_id>.txt.gz`
+- MetaPhlAn3: `<sample_id>_profile.txt`
+- mOTUs: `<sample_id>.out`
+
 ### MultiQC
 
 <details markdown="1">
diff --git a/modules.json b/modules.json
index 9b87f43..5b07b07 100644
--- a/modules.json
+++ b/modules.json
@@ -8,301 +8,217 @@
                     "adapterremoval": {
                         "branch": "master",
                         "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
-                        "installed_by": [
-                            "modules"
-                        ]
+                        "installed_by": ["modules"]
                     },
                     "bbmap/bbduk": {
                         "branch": "master",
                         "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
-                        "installed_by": [
-                            "modules"
-                        ]
+                        "installed_by": ["modules"]
                     },
                     "bowtie2/align": {
                         "branch": "master",
                         "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
-                        "installed_by": [
-                            "modules"
-                        ]
+                        "installed_by": ["modules"]
                     },
                     "bowtie2/build": {
                         "branch": "master",
                         "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
-                        "installed_by": [
-                            "modules"
-                        ]
+                        "installed_by": ["modules"]
                     },
                     "bracken/bracken": {
                         "branch": "master",
                         "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
-                        "installed_by": [
-                            "modules"
-                        ]
+                        "installed_by": ["modules"]
                     },
                     "bracken/combinebrackenoutputs": {
                         "branch": "master",
                         "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
-                        "installed_by": [
-                            "modules"
-                        ]
+                        "installed_by": ["modules"]
                     },
                     "cat/fastq": {
                         "branch": "master",
                         "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
-                        "installed_by": [
-                            "modules"
-                        ]
+                        "installed_by": ["modules"]
                     },
                     "centrifuge/centrifuge": {
                         "branch": "master",
                         "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
-                        "installed_by": [
-                            "modules"
-                        ]
+                        "installed_by": ["modules"]
                     },
                     "centrifuge/kreport": {
                         "branch": "master",
                         "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
-                        "installed_by": [
-                            "modules"
-                        ]
+                        "installed_by": ["modules"]
                     },
                     "custom/dumpsoftwareversions": {
                         "branch": "master",
                         "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
-                        "installed_by": [
-                            "modules"
-                        ]
+                        "installed_by": ["modules"]
                     },
                     "diamond/blastx": {
                         "branch": "master",
                         "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
-                        "installed_by": [
-                            "modules"
-                        ]
+                        "installed_by": ["modules"]
                     },
                     "falco": {
                         "branch": "master",
                         "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
-                        "installed_by": [
-                            "modules"
-                        ],
+                        "installed_by": ["modules"],
                         "patch": "modules/nf-core/falco/falco.diff"
                     },
                     "fastp": {
                         "branch": "master",
                         "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
-                        "installed_by": [
-                            "modules"
-                        ]
+                        "installed_by": ["modules"]
                     },
                     "fastqc": {
                         "branch": "master",
                         "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
-                        "installed_by": [
-                            "modules"
-                        ]
+                        "installed_by": ["modules"]
                     },
                     "filtlong": {
                         "branch": "master",
                         "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
-                        "installed_by": [
-                            "modules"
-                        ]
+                        "installed_by": ["modules"]
                     },
                     "gunzip": {
                         "branch": "master",
                         "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
-                        "installed_by": [
-                            "modules"
-                        ]
+                        "installed_by": ["modules"]
                     },
                     "kaiju/kaiju": {
                         "branch": "master",
                         "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
-                        "installed_by": [
-                            "modules"
-                        ]
+                        "installed_by": ["modules"]
                     },
                     "kaiju/kaiju2krona": {
                         "branch": "master",
                         "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
-                        "installed_by": [
-                            "modules"
-                        ]
+                        "installed_by": ["modules"]
                     },
                     "kaiju/kaiju2table": {
                         "branch": "master",
                         "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
-                        "installed_by": [
-                            "modules"
-                        ]
+                        "installed_by": ["modules"]
                     },
                     "kraken2/kraken2": {
                         "branch": "master",
                         "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
-                        "installed_by": [
-                            "modules"
-                        ]
+                        "installed_by": ["modules"]
                     },
                     "krakentools/combinekreports": {
                         "branch": "master",
                         "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
-                        "installed_by": [
-                            "modules"
-                        ]
+                        "installed_by": ["modules"]
                     },
                     "krakentools/kreport2krona": {
                         "branch": "master",
                         "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
-                        "installed_by": [
-                            "modules"
-                        ]
+                        "installed_by": ["modules"]
                     },
                     "krakenuniq/preloadedkrakenuniq": {
                         "branch": "master",
                         "git_sha": "a6eb17f65b3ee5761c25c075a6166c9f76733cee",
-                        "installed_by": [
-                            "modules"
-                        ]
+                        "installed_by": ["modules"]
                     },
                     "krona/ktimporttaxonomy": {
                         "branch": "master",
                         "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
-                        "installed_by": [
-                            "modules"
-                        ]
+                        "installed_by": ["modules"]
                     },
                     "krona/ktimporttext": {
                         "branch": "master",
                         "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
-                        "installed_by": [
-                            "modules"
-                        ]
+                        "installed_by": ["modules"]
                     },
                     "malt/run": {
                         "branch": "master",
                         "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
-                        "installed_by": [
-                            "modules"
-                        ]
+                        "installed_by": ["modules"]
                     },
                     "megan/rma2info": {
                         "branch": "master",
                         "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
-                        "installed_by": [
-                            "modules"
-                        ]
+                        "installed_by": ["modules"]
                     },
                     "metaphlan3/mergemetaphlantables": {
                         "branch": "master",
                         "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
-                        "installed_by": [
-                            "modules"
-                        ]
+                        "installed_by": ["modules"]
                     },
                     "metaphlan3/metaphlan3": {
                         "branch": "master",
                         "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
-                        "installed_by": [
-                            "modules"
-                        ]
+                        "installed_by": ["modules"]
                     },
                     "minimap2/align": {
                         "branch": "master",
                         "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
-                        "installed_by": [
-                            "modules"
-                        ]
+                        "installed_by": ["modules"]
                     },
                     "minimap2/index": {
                         "branch": "master",
                         "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
-                        "installed_by": [
-                            "modules"
-                        ]
+                        "installed_by": ["modules"]
                     },
                     "motus/merge": {
                         "branch": "master",
                         "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
-                        "installed_by": [
-                            "modules"
-                        ]
+                        "installed_by": ["modules"]
                     },
                     "motus/profile": {
                         "branch": "master",
                         "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
-                        "installed_by": [
-                            "modules"
-                        ]
+                        "installed_by": ["modules"]
                     },
                     "multiqc": {
                         "branch": "master",
                         "git_sha": "ee80d14721e76e2e079103b8dcd5d57129e584ba",
-                        "installed_by": [
-                            "modules"
-                        ]
+                        "installed_by": ["modules"]
                     },
                     "porechop/porechop": {
                         "branch": "master",
                         "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
-                        "installed_by": [
-                            "modules"
-                        ],
+                        "installed_by": ["modules"],
                         "patch": "modules/nf-core/porechop/porechop/porechop-porechop.diff"
                     },
                     "prinseqplusplus": {
                         "branch": "master",
                         "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
-                        "installed_by": [
-                            "modules"
-                        ]
+                        "installed_by": ["modules"]
                     },
                     "samtools/bam2fq": {
                         "branch": "master",
                         "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
-                        "installed_by": [
-                            "modules"
-                        ]
+                        "installed_by": ["modules"]
                     },
                     "samtools/index": {
                         "branch": "master",
                         "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
-                        "installed_by": [
-                            "modules"
-                        ]
+                        "installed_by": ["modules"]
                     },
                     "samtools/stats": {
                         "branch": "master",
                         "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
-                        "installed_by": [
-                            "modules"
-                        ]
+                        "installed_by": ["modules"]
                     },
                     "samtools/view": {
                         "branch": "master",
                         "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
-                        "installed_by": [
-                            "modules"
-                        ]
+                        "installed_by": ["modules"]
                     },
                     "taxpasta/merge": {
                         "branch": "master",
                         "git_sha": "74ab450ed05e034d049c00f6e2853de2c31594b4",
-                        "installed_by": [
-                            "modules"
-                        ]
+                        "installed_by": ["modules"]
                     },
                     "untar": {
                         "branch": "master",
                         "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
-                        "installed_by": [
-                            "modules"
-                        ]
+                        "installed_by": ["modules"]
                     }
                 }
             }
         }
     }
-}
\ No newline at end of file
+}
diff --git a/nextflow_schema.json b/nextflow_schema.json
index a84a232..b17094f 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -509,6 +509,21 @@
                     "fa_icon": "fas fa-folder-open",
                     "description": "Specify path to krona taxonomy directories (required for MALT krona plots)",
                     "help_text": "Specify a path to a Krona taxonomy database directory (i.e. a directory containing a krona generated `.tab` file).\n\nThis is only required for generating Krona plots of MALT output.\n\nNote this taxonomy database must be downloaded and generated with the `updateTaxonomy.sh` script from the krona-tools package."
+                },
+                "taxpasta_add_taxonomy": {
+                    "type": "boolean",
+                    "description": "The path to a directory  containing taxdump files.",
+                    "help_text": "At least nodes.dmp and names.dmp are required. A merged.dmp file is optional."
+                },
+                "taxpasta_add_samplesheet": {
+                    "type": "boolean",
+                    "description": "A table with with two columns, one for the sample and one for  the taxonomic profile.",
+                    "help_text": "If this option is provided, any arguments are ignored."
+                },
+                "taxpasta_standardisation_format": {
+                    "type": "string",
+                    "default": "tsv",
+                    "description": "The desired output format."
                 }
             },
             "fa_icon": "fas fa-chart-line"
@@ -767,19 +782,5 @@
         {
             "$ref": "#/definitions/reference_genome_options"
         }
-    ],
-    "properties": {
-        "add_taxonomy": {
-            "type": "string",
-            "default": "false"
-        },
-        "add_samplesheet": {
-            "type": "string",
-            "default": "false"
-        },
-        "standardisation_taxpasta_format": {
-            "type": "string",
-            "default": "tsv"
-        }
-    }
+    ]
 }

From c9b521234bab8db27946181ea114f11bfe3e330d Mon Sep 17 00:00:00 2001
From: sofstam <sofia.stamouli@scilifelab.se>
Date: Thu, 16 Feb 2023 16:38:00 +0100
Subject: [PATCH 04/12] Prettier for docs

---
 docs/output.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/output.md b/docs/output.md
index 4ffad3f..fb1f728 100644
--- a/docs/output.md
+++ b/docs/output.md
@@ -33,7 +33,7 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d
 - [MALT](#malt) - Sequence alignment and analysis tool designed for processing high-throughput sequencing data, especially in the context of metagenomics
 - [MetaPhlAn3](#metaphlan3) - Genome-level marker gene based taxonomic classifier
 - [mOTUs](#motus) - Tool for marker gene-based OTU (mOTU) profiling.
-- [TAXPASTA](#taxpasta) -  Tool to standardise taxonomic profiles as well as merge profiles across samples for the same classifier/profiler.
+- [TAXPASTA](#taxpasta) - Tool to standardise taxonomic profiles as well as merge profiles across samples for the same classifier/profiler.
 - [MultiQC](#multiqc) - Aggregate report describing results and QC from the whole pipeline
 - [Pipeline information](#pipeline-information) - Report metrics generated during the workflow execution
 
@@ -445,7 +445,7 @@ The resulting HTML files can be loaded into your web browser for exploration. Ea
 
 - `taxpasta`
   - `<db_name>.<sample_id>*.{tsv,csv,arrow,parquet,biom}`: A list of taxonomic profiler output files. The standard format is the `tsv`. The first column describes the taxonomy ID and the rest of the columns describe the read counts for each sample.
-</details>
+  </details>
 
 The following report files are used for the taxpasta step:
 

From 27b7171b96703129e1b2357db2f4d87febfab5a6 Mon Sep 17 00:00:00 2001
From: sofstam <sofia.stamouli@scilifelab.se>
Date: Fri, 17 Feb 2023 09:16:58 +0100
Subject: [PATCH 05/12] Apply review suggestions

---
 conf/modules.config                            | 2 +-
 conf/test.config                               | 2 +-
 docs/output.md                                 | 4 ++--
 nextflow_schema.json                           | 3 ++-
 subworkflows/local/standardisation_profiles.nf | 2 --
 5 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/conf/modules.config b/conf/modules.config
index 6aaa379..4a1fba2 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -548,7 +548,7 @@ process {
                 "-p ${meta.tool} -o ${meta.tool}_${meta.id}.${params.taxpasta_standardisation_format}",
                 params.taxpasta_add_taxonomy ? "-p" : "",
                 params.taxpasta_add_samplesheet ? "-s" :""
-            ].join(',').replaceAll(','," ")
+            ].join(' ').trim()
             }
         publishDir = [
             path: { "${params.outdir}/taxpasta/" },
diff --git a/conf/test.config b/conf/test.config
index 682d087..925987e 100644
--- a/conf/test.config
+++ b/conf/test.config
@@ -33,7 +33,7 @@ params {
     run_kaiju                             = true
     run_kraken2                           = true
     run_bracken                           = true
-    run_malt                              = false
+    run_malt                              = true
     run_metaphlan3                        = true
     run_centrifuge                        = true
     run_diamond                           = true
diff --git a/docs/output.md b/docs/output.md
index fb1f728..3da9b22 100644
--- a/docs/output.md
+++ b/docs/output.md
@@ -33,7 +33,7 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d
 - [MALT](#malt) - Sequence alignment and analysis tool designed for processing high-throughput sequencing data, especially in the context of metagenomics
 - [MetaPhlAn3](#metaphlan3) - Genome-level marker gene based taxonomic classifier
 - [mOTUs](#motus) - Tool for marker gene-based OTU (mOTU) profiling.
-- [TAXPASTA](#taxpasta) - Tool to standardise taxonomic profiles as well as merge profiles across samples for the same classifier/profiler.
+- [TAXPASTA](#taxpasta) - Tool to standardise taxonomic profiles as well as merge profiles across samples from the same database and classifier/profiler.
 - [MultiQC](#multiqc) - Aggregate report describing results and QC from the whole pipeline
 - [Pipeline information](#pipeline-information) - Report metrics generated during the workflow execution
 
@@ -438,7 +438,7 @@ The resulting HTML files can be loaded into your web browser for exploration. Ea
 
 ### TAXPASTA
 
-[TAXPASTA](https://github.com/taxprofiler/taxpasta) is a python package added as a module in nf-core/taxprofiler that can standardise and merge two or more taxonomic profiles across samples into one single table..
+[TAXPASTA](https://github.com/taxprofiler/taxpasta) that standardises and merge two or more taxonomic profiles across samples into one single table. It supports multiple different classifiers simplifying taxonomic classification results between tools and databases.
 
 <details markdown="1">
 <summary>Output files</summary>
diff --git a/nextflow_schema.json b/nextflow_schema.json
index b17094f..a6eb0a8 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -523,7 +523,8 @@
                 "taxpasta_standardisation_format": {
                     "type": "string",
                     "default": "tsv",
-                    "description": "The desired output format."
+                    "description": "The desired output format.",
+                    "enum": ["tsv", "csv", "arrow", "parquet", "biom"]
                 }
             },
             "fa_icon": "fas fa-chart-line"
diff --git a/subworkflows/local/standardisation_profiles.nf b/subworkflows/local/standardisation_profiles.nf
index 0a416a9..d928263 100644
--- a/subworkflows/local/standardisation_profiles.nf
+++ b/subworkflows/local/standardisation_profiles.nf
@@ -90,7 +90,6 @@ workflow STANDARDISATION_PROFILES {
                                 }
 
 
-
     KRAKENTOOLS_COMBINEKREPORTS_CENTRIFUGE ( ch_profiles_for_centrifuge )
     ch_standardised_tables = ch_standardised_tables.mix( KRAKENTOOLS_COMBINEKREPORTS_CENTRIFUGE.out.txt )
     ch_multiqc_files = ch_multiqc_files.mix( KRAKENTOOLS_COMBINEKREPORTS_CENTRIFUGE.out.txt )
@@ -142,7 +141,6 @@ workflow STANDARDISATION_PROFILES {
     ch_multiqc_files = ch_multiqc_files.mix( METAPHLAN3_MERGEMETAPHLANTABLES.out.txt )
     ch_versions = ch_versions.mix( METAPHLAN3_MERGEMETAPHLANTABLES.out.versions )
 
-    ch_standardised_tables.dump (tag: 'standardised')
 
     // mOTUs
 

From 5ebc22dd29b992495fcaf42511b1038f84d6e78a Mon Sep 17 00:00:00 2001
From: sofstam <sofia.stamouli@scilifelab.se>
Date: Fri, 17 Feb 2023 12:58:33 +0100
Subject: [PATCH 06/12] Add icon in nextflow_schema.json

---
 nextflow_schema.json | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/nextflow_schema.json b/nextflow_schema.json
index a6eb0a8..80ba987 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -524,7 +524,8 @@
                     "type": "string",
                     "default": "tsv",
                     "description": "The desired output format.",
-                    "enum": ["tsv", "csv", "arrow", "parquet", "biom"]
+                    "enum": ["tsv", "csv", "arrow", "parquet", "biom"],
+                    "fa_icon": "fas fa-file"
                 }
             },
             "fa_icon": "fas fa-chart-line"

From 08bfa6a1a73970bf986cf2be312565d029cbbb6a Mon Sep 17 00:00:00 2001
From: sofstam <sofia.stamouli@scilifelab.se>
Date: Fri, 17 Feb 2023 13:43:38 +0100
Subject: [PATCH 07/12] Apply review suggestions for docs

---
 docs/output.md | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/docs/output.md b/docs/output.md
index 3da9b22..284e55b 100644
--- a/docs/output.md
+++ b/docs/output.md
@@ -449,15 +449,15 @@ The resulting HTML files can be loaded into your web browser for exploration. Ea
 
 The following report files are used for the taxpasta step:
 
-- Bracken: `<sample>_<db_name>.tsv`
-- Centrifuge: `<sample_id>.centrifuge.txt`
-- Diamond: `<sample_id>`
-- Kaiju: `<sample_id>_<db_name>.kaijutable.txt`
-- KrakenUniq: `<sample_id>_<db_name>.report.txt`
-- Kraken2: `<sample_id>_<db_name>.report.txt`
-- MALT: `<sample_id>.txt.gz`
-- MetaPhlAn3: `<sample_id>_profile.txt`
-- mOTUs: `<sample_id>.out`
+- Bracken: `<sample>_<db_name>.tsv` Taxpasta used the `new_est_reads` column for the standardised profile.
+- Centrifuge: `<sample_id>.centrifuge.txt` Taxpasta uses the `direct_assigned_reads` column for the standardised profile.
+- Diamond: `<sample_id>` Taxpasta summarises number of reads per NCBI taxonomy ID standardised profile.
+- Kaiju: `<sample_id>_<db_name>.kaijutable.txt` Taxpasta uses the `reads` column from kaiju2table standardised profile.
+- KrakenUniq: `<sample_id>_<db_name>.report.txt` Taxpasta uses the `reads` column for the standardised profile.
+- Kraken2: `<sample_id>_<db_name>.report.txt` Taxpasta uses the `direct_assigned_reads` column for the standardised profile.
+- MALT: `<sample_id>.txt.gz` Taxpasta uses the `count` (second) column from the output of MEGAN6's rma2info for the standardised profile.
+- MetaPhlAn3: `<sample_id>_profile.txt` Taxpasta uses the `relative_abundance` column multiplied with a fixed number to yield an integer for the standardised profile.
+- mOTUs: `<sample_id>.out` Taxpasta uses the `read_count` column for the standardised profile.
 
 ### MultiQC
 

From 604df56fdbf80bf6e079b75c4e2a1b4901ebd9af Mon Sep 17 00:00:00 2001
From: sofstam <sofia.stamouli@scilifelab.se>
Date: Fri, 17 Feb 2023 13:49:13 +0100
Subject: [PATCH 08/12] Add a description about the files used by taxpasta

---
 docs/output.md | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/docs/output.md b/docs/output.md
index 284e55b..9a7518d 100644
--- a/docs/output.md
+++ b/docs/output.md
@@ -444,9 +444,13 @@ The resulting HTML files can be loaded into your web browser for exploration. Ea
 <summary>Output files</summary>
 
 - `taxpasta`
-  - `<db_name>.<sample_id>*.{tsv,csv,arrow,parquet,biom}`: A list of taxonomic profiler output files. The standard format is the `tsv`. The first column describes the taxonomy ID and the rest of the columns describe the read counts for each sample.
+
+  - `<tool>_<database>*.{tsv,csv,arrow,parquet,biom}`: Standardised taxon table containing multiple samples. The standard format is the `tsv`. The first column describes the taxonomy ID and the rest of the columns describe the read counts for each sample.
+
   </details>
 
+These files will likely be the most useful files for the comparison of differences in classification between different tools or building consensuses, with the caveat they have slightly less information than the actual output from each tool (which may have non-standard information e.g. taxonomic rank, percentage of hits, abundance estimations).
+
 The following report files are used for the taxpasta step:
 
 - Bracken: `<sample>_<db_name>.tsv` Taxpasta used the `new_est_reads` column for the standardised profile.

From f0aa89cb877513034e7202f6a977f5f79a86f0df Mon Sep 17 00:00:00 2001
From: sofstam <sofia.stamouli@scilifelab.se>
Date: Fri, 17 Feb 2023 14:47:29 +0100
Subject: [PATCH 09/12] Rename taxpasta parameters and remove samplesheet
 parameter

---
 conf/modules.config  |  7 +++----
 nextflow.config      |  7 +++----
 nextflow_schema.json | 25 ++++++++++---------------
 3 files changed, 16 insertions(+), 23 deletions(-)

diff --git a/conf/modules.config b/conf/modules.config
index 4a1fba2..0bab02b 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -534,7 +534,7 @@ process {
     }
 
     withName: MOTUS_MERGE {
-        ext.args = { params.generate_biom_output ? "-B" : "" }
+        ext.args = { params.standardisation_motus_generatebiom ? "-B" : "" }
         ext.prefix = { "motus_${meta.id}_combined_reports" }
         publishDir = [
             path: { "${params.outdir}/motus/" },
@@ -545,9 +545,8 @@ process {
     withName: TAXPASTA_MERGE {
         ext.args =  {
             [
-                "-p ${meta.tool} -o ${meta.tool}_${meta.id}.${params.taxpasta_standardisation_format}",
-                params.taxpasta_add_taxonomy ? "-p" : "",
-                params.taxpasta_add_samplesheet ? "-s" :""
+                "-p ${meta.tool} -o ${meta.tool}_${meta.id}.${params.standardisation_taxpasta_format}",
+                params.standardisation_taxpasta_addtaxonomy ? "-p" : ""
             ].join(' ').trim()
             }
         publishDir = [
diff --git a/nextflow.config b/nextflow.config
index 9d3af4c..34e8b00 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -156,10 +156,9 @@ params {
 
     // profile standardisation
     run_profile_standardisation             = false
-    taxpasta_add_taxonomy                   = false
-    taxpasta_add_samplesheet                = false
-    taxpasta_standardisation_format         = 'tsv'
-    generate_biom_output                    = false
+    standardisation_taxpasta_addtaxonomy    = false
+    standardisation_taxpasta_format         = 'tsv'
+    standardisation_motus_generatebiom      = false
 }
 
 // Load base.config by default for all pipelines
diff --git a/nextflow_schema.json b/nextflow_schema.json
index 80ba987..a7a1b4d 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -491,11 +491,11 @@
                     "description": "Turn on standardisation of taxon tables across profilers",
                     "help_text": "Turns on standardisation of output OTU tables across all tools; each into a TSV format following the following scheme:\n\n|TAXON   | SAMPLE_A | SAMPLE_B |\n|-------------|----------------|-----------------|\n| taxon_a | 32               | 123             |\n| taxon_b | 1                 | 5                 |\n\nThis currently only is generated for mOTUs."
                 },
-                "generate_biom_output": {
+                "standardisation_motus_generatebiom": {
                     "type": "boolean",
                     "fa_icon": "fas fa-toggle-on",
                     "description": "Turn on generation of BIOM output (currently only applies to mOTUs)",
-                    "help_text": "Turn on the saving of the taxonomic output in BIOM format (`.biom`) in the results directory of your pipeline run, instead of the default TSV format.\n\nNote this file is from the output of the `motus merge` command.\n\n> Modifies tool parameter(s):\n> - `-B -o`"
+                    "help_text": "Turn on the saving of the taxonomic output in BIOM format (`.biom`) in the results directory of your pipeline run, instead of the default TSV format.\\n\\nNote this file is from the output of the `motus merge` command.\\n\\n> Modifies tool parameter(s):\\n> - `-B -o`"
                 },
                 "run_krona": {
                     "type": "boolean",
@@ -510,22 +510,17 @@
                     "description": "Specify path to krona taxonomy directories (required for MALT krona plots)",
                     "help_text": "Specify a path to a Krona taxonomy database directory (i.e. a directory containing a krona generated `.tab` file).\n\nThis is only required for generating Krona plots of MALT output.\n\nNote this taxonomy database must be downloaded and generated with the `updateTaxonomy.sh` script from the krona-tools package."
                 },
-                "taxpasta_add_taxonomy": {
-                    "type": "boolean",
-                    "description": "The path to a directory  containing taxdump files.",
-                    "help_text": "At least nodes.dmp and names.dmp are required. A merged.dmp file is optional."
-                },
-                "taxpasta_add_samplesheet": {
-                    "type": "boolean",
-                    "description": "A table with with two columns, one for the sample and one for  the taxonomic profile.",
-                    "help_text": "If this option is provided, any arguments are ignored."
-                },
-                "taxpasta_standardisation_format": {
+                "standardisation_taxpasta_format": {
                     "type": "string",
                     "default": "tsv",
+                    "fa_icon": "fas fa-file",
                     "description": "The desired output format.",
-                    "enum": ["tsv", "csv", "arrow", "parquet", "biom"],
-                    "fa_icon": "fas fa-file"
+                    "enum": ["tsv", "csv", "arrow", "parquet", "biom"]
+                },
+                "standardisation_taxpasta_addtaxonomy": {
+                    "type": "boolean",
+                    "description": "The path to a directory containing taxdump files, typically from NCBI.",
+                    "help_text": "At least nodes.dmp and names.dmp are required. A merged.dmp file is optional."
                 }
             },
             "fa_icon": "fas fa-chart-line"

From 90a5668007b84a5ece6fa85b2385514c4cd7790d Mon Sep 17 00:00:00 2001
From: sofstam <sofia.stamouli@scilifelab.se>
Date: Fri, 17 Feb 2023 15:38:36 +0100
Subject: [PATCH 10/12] run_malt false in test and add text in output.md

---
 conf/test.config                               | 4 ++--
 docs/output.md                                 | 2 ++
 subworkflows/local/standardisation_profiles.nf | 1 -
 3 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/conf/test.config b/conf/test.config
index 925987e..c80533a 100644
--- a/conf/test.config
+++ b/conf/test.config
@@ -25,7 +25,7 @@ params {
     perform_shortread_qc                  = true
     perform_longread_qc                   = true
     shortread_qc_mergepairs               = true
-    perform_shortread_complexityfilter    = false
+    perform_shortread_complexityfilter    = true
     perform_shortread_hostremoval         = true
     perform_longread_hostremoval          = true
     perform_runmerging                    = true
@@ -33,7 +33,7 @@ params {
     run_kaiju                             = true
     run_kraken2                           = true
     run_bracken                           = true
-    run_malt                              = true
+    run_malt                              = false
     run_metaphlan3                        = true
     run_centrifuge                        = true
     run_diamond                           = true
diff --git a/docs/output.md b/docs/output.md
index 9a7518d..1e39576 100644
--- a/docs/output.md
+++ b/docs/output.md
@@ -463,6 +463,8 @@ The following report files are used for the taxpasta step:
 - MetaPhlAn3: `<sample_id>_profile.txt` Taxpasta uses the `relative_abundance` column multiplied with a fixed number to yield an integer for the standardised profile.
 - mOTUs: `<sample_id>.out` Taxpasta uses the `read_count` column for the standardised profile.
 
+Please aware the outputs of each tool's standardised profile _may not_ be directly comparable between each tool. Some may report raw read counts, whereas others may report abundance information. Please always refer to the list above, for which information is used for each tool.
+
 ### MultiQC
 
 <details markdown="1">
diff --git a/subworkflows/local/standardisation_profiles.nf b/subworkflows/local/standardisation_profiles.nf
index d928263..de58622 100644
--- a/subworkflows/local/standardisation_profiles.nf
+++ b/subworkflows/local/standardisation_profiles.nf
@@ -141,7 +141,6 @@ workflow STANDARDISATION_PROFILES {
     ch_multiqc_files = ch_multiqc_files.mix( METAPHLAN3_MERGEMETAPHLANTABLES.out.txt )
     ch_versions = ch_versions.mix( METAPHLAN3_MERGEMETAPHLANTABLES.out.versions )
 
-
     // mOTUs
 
     // mOTUs has a 'single' database, and cannot create custom ones.

From 211b5f3a6802f41d47e23c4840eaf116299a367c Mon Sep 17 00:00:00 2001
From: sofstam <sofia.stamouli@scilifelab.se>
Date: Mon, 20 Feb 2023 11:49:31 +0100
Subject: [PATCH 11/12] Apply review suggestions

---
 docs/output.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/output.md b/docs/output.md
index 1e39576..85b163c 100644
--- a/docs/output.md
+++ b/docs/output.md
@@ -438,7 +438,7 @@ The resulting HTML files can be loaded into your web browser for exploration. Ea
 
 ### TAXPASTA
 
-[TAXPASTA](https://github.com/taxprofiler/taxpasta) that standardises and merge two or more taxonomic profiles across samples into one single table. It supports multiple different classifiers simplifying taxonomic classification results between tools and databases.
+[TAXPASTA](https://github.com/taxprofiler/taxpasta) standardises and merges two or more taxonomic profiles across samples into one single table. It supports multiple different classifiers simplifying comparison of taxonomic classification results between tools and databases.
 
 <details markdown="1">
 <summary>Output files</summary>
@@ -463,7 +463,7 @@ The following report files are used for the taxpasta step:
 - MetaPhlAn3: `<sample_id>_profile.txt` Taxpasta uses the `relative_abundance` column multiplied with a fixed number to yield an integer for the standardised profile.
 - mOTUs: `<sample_id>.out` Taxpasta uses the `read_count` column for the standardised profile.
 
-Please aware the outputs of each tool's standardised profile _may not_ be directly comparable between each tool. Some may report raw read counts, whereas others may report abundance information. Please always refer to the list above, for which information is used for each tool.
+> ⚠️ Please aware the outputs of each tool's standardised profile _may not_ be directly comparable between each tool. Some may report raw read counts, whereas others may report abundance information. Please always refer to the list above, for which information is used for each tool.
 
 ### MultiQC
 

From 04bc978ef67af53b36dff02308893c1525f7f47f Mon Sep 17 00:00:00 2001
From: sofstam <sofia.stamouli@scilifelab.se>
Date: Mon, 20 Feb 2023 11:56:06 +0100
Subject: [PATCH 12/12] Remove taxonomy parameter from taxpasta

---
 conf/modules.config  | 7 +------
 nextflow.config      | 1 -
 nextflow_schema.json | 5 -----
 3 files changed, 1 insertion(+), 12 deletions(-)

diff --git a/conf/modules.config b/conf/modules.config
index 0bab02b..605a7c5 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -543,12 +543,7 @@ process {
     }
 
     withName: TAXPASTA_MERGE {
-        ext.args =  {
-            [
-                "-p ${meta.tool} -o ${meta.tool}_${meta.id}.${params.standardisation_taxpasta_format}",
-                params.standardisation_taxpasta_addtaxonomy ? "-p" : ""
-            ].join(' ').trim()
-            }
+        ext.args =  { "-p ${meta.tool} -o ${meta.tool}_${meta.id}.${params.standardisation_taxpasta_format}" }
         publishDir = [
             path: { "${params.outdir}/taxpasta/" },
             mode: params.publish_dir_mode,
diff --git a/nextflow.config b/nextflow.config
index 34e8b00..9228d67 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -156,7 +156,6 @@ params {
 
     // profile standardisation
     run_profile_standardisation             = false
-    standardisation_taxpasta_addtaxonomy    = false
     standardisation_taxpasta_format         = 'tsv'
     standardisation_motus_generatebiom      = false
 }
diff --git a/nextflow_schema.json b/nextflow_schema.json
index a7a1b4d..5871862 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -516,11 +516,6 @@
                     "fa_icon": "fas fa-file",
                     "description": "The desired output format.",
                     "enum": ["tsv", "csv", "arrow", "parquet", "biom"]
-                },
-                "standardisation_taxpasta_addtaxonomy": {
-                    "type": "boolean",
-                    "description": "The path to a directory containing taxdump files, typically from NCBI.",
-                    "help_text": "At least nodes.dmp and names.dmp are required. A merged.dmp file is optional."
                 }
             },
             "fa_icon": "fas fa-chart-line"