diff --git a/conf/modules.config b/conf/modules.config index df90c38..1558a98 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -398,6 +398,13 @@ process { ] } + withName: MOTUS_MERGE { + publishDir = [ + path: { "${params.outdir}/motus/" }, + mode: params.publish_dir_mode + ] + } + withName: CUSTOM_DUMPSOFTWAREVERSIONS { publishDir = [ path: { "${params.outdir}/pipeline_info" }, diff --git a/conf/test_motus.config b/conf/test_motus.config index 9d39ad4..1405447 100644 --- a/conf/test_motus.config +++ b/conf/test_motus.config @@ -38,4 +38,5 @@ params { run_centrifuge = false run_diamond = false run_motus = true + run_profile_standardisation = true } diff --git a/modules.json b/modules.json index faa267d..132ff8b 100644 --- a/modules.json +++ b/modules.json @@ -78,6 +78,9 @@ "minimap2/index": { "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" }, + "motus/merge": { + "git_sha": "b02e648c221e1da17cb589eefe297e61ec9e9c49" + }, "motus/profile": { "git_sha": "b6ed584443ad68ac41e6975994139454a4f23c18" }, diff --git a/modules/nf-core/modules/motus/merge/main.nf b/modules/nf-core/modules/motus/merge/main.nf new file mode 100644 index 0000000..01ca5a2 --- /dev/null +++ b/modules/nf-core/modules/motus/merge/main.nf @@ -0,0 +1,47 @@ +VERSION = '3.0.1' + +process MOTUS_MERGE { + label 'process_low' + + conda (params.enable_conda ? "bioconda::motus=3.0.1" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/motus:3.0.1--pyhdfd78af_0': + 'quay.io/biocontainers/motus:3.0.1--pyhdfd78af_0' }" + + input: + path input + path db // to stop docker saying it can't find it... would have to have the module in upstream steps anyway + path profile_version_yml, stageAs: 'profile_version.yml' + val biom_format + + output: + path("*.txt") , optional: true, emit: txt + path("*.biom"), optional: true, emit: biom + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = 'motus_merged' + def cmd_input = input.size() > 1 ? "-i ${input.join(',')}" : input.isDirectory() ? "-d ${input}" : "-i ${input}" + def output = biom_format ? "-B -o ${prefix}.biom" : "-o ${prefix}.txt" + """ + motus \\ + merge \\ + -db $db \\ + ${cmd_input} \\ + $args \\ + ${output} + + ## Take version from the mOTUs/profile module output, as cannot reconstruct + ## version without having database staged in this directory. + VERSION=\$(cat ${profile_version_yml} | grep '/*motus:.*' | sed 's/.*otus: //g') + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + motus: \$VERSION + END_VERSIONS + """ +} diff --git a/modules/nf-core/modules/motus/merge/meta.yml b/modules/nf-core/modules/motus/merge/meta.yml new file mode 100644 index 0000000..c9c7711 --- /dev/null +++ b/modules/nf-core/modules/motus/merge/meta.yml @@ -0,0 +1,57 @@ +name: "motus_merge" +description: Taxonomic meta-omics profiling using universal marker genes +keywords: + - classify + - metagenomics + - fastq + - taxonomic profiling + - merging + - merge + - otu table +tools: + - "motus": + description: "Marker gene-based OTU (mOTU) profiling" + homepage: "https://motu-tool.org/" + documentation: "https://github.com/motu-tool/mOTUs/wiki" + tool_dev_url: "https://github.com/motu-tool/mOTUs" + doi: "10.1038/s41467-019-08844-4" + licence: "['GPL v3']" + +input: + - input: + type: file + description: | + List of output files (more than one) from motus profile, + or a single directory containing motus output files. + - db: + type: directory + description: | + mOTUs database downloaded by `motus downloadDB` + pattern: "db_mOTU/" + - profile_version_yml: + type: file + description: | + A single versions.yml file output from motus/profile. motus/merge cannot reconstruct + this itself without having the motus database present and configured with the tool + so here we take it from what is already reported by the upstream module. + pattern: "versions.yml" + - biom_format: + type: boolean + description: Whether to save output OTU table in biom format + +output: + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - txt: + type: file + description: OTU table in txt format, if BIOM format not requested + pattern: "*.txt" + - biom: + type: file + description: OTU table in biom format, if BIOM format requested + pattern: "*.biom" + +authors: + - "@jfy133" diff --git a/nextflow.config b/nextflow.config index 7160d0f..58c9254 100644 --- a/nextflow.config +++ b/nextflow.config @@ -132,6 +132,10 @@ params { // krona run_krona = false krona_taxonomy_directory = null + + // profile standardisation + run_profile_standardisation = false + generate_biom_output = false } // Load base.config by default for all pipelines diff --git a/nextflow_schema.json b/nextflow_schema.json index 4eec889..b4cb837 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -52,7 +52,8 @@ "type": "string", "description": "Name of iGenomes reference.", "fa_icon": "fas fa-book", - "help_text": "If using a reference genome configured in the pipeline using iGenomes, use this parameter to give the ID for the reference. This is then used to build the full paths for all required reference genome files e.g. `--genome GRCh38`. \n\nSee the [nf-core website docs](https://nf-co.re/usage/reference_genomes) for more details." + "help_text": "If using a reference genome configured in the pipeline using iGenomes, use this parameter to give the ID for the reference. This is then used to build the full paths for all required reference genome files e.g. `--genome GRCh38`. \n\nSee the [nf-core website docs](https://nf-co.re/usage/reference_genomes) for more details.", + "hidden": true }, "igenomes_base": { "type": "string", @@ -444,7 +445,13 @@ }, "krona_taxonomy_directory": { "type": "string", - "default": null + "default": "None" + }, + "run_profile_standardisation": { + "type": "boolean" + }, + "generate_biom_output": { + "type": "boolean" } } } diff --git a/subworkflows/local/profiling.nf b/subworkflows/local/profiling.nf index 38bc9a8..7c2c90c 100644 --- a/subworkflows/local/profiling.nf +++ b/subworkflows/local/profiling.nf @@ -228,12 +228,12 @@ workflow PROFILING { ch_versions = ch_versions.mix( MOTUS_PROFILE.out.versions.first() ) ch_raw_profiles = ch_raw_profiles.mix( MOTUS_PROFILE.out.out ) ch_multiqc_files = ch_multiqc_files.mix( MOTUS_PROFILE.out.log.map{it[1]} ) - } emit: classifications = ch_raw_classifications profiles = ch_raw_profiles // channel: [ val(meta), [ reads ] ] - should be text files or biom versions = ch_versions // channel: [ versions.yml ] + motus_version = params.run_motus ? MOTUS_PROFILE.out.versions.first() : Channel.empty() mqc = ch_multiqc_files } diff --git a/subworkflows/local/standardisation_profiles.nf b/subworkflows/local/standardisation_profiles.nf new file mode 100644 index 0000000..92ceb16 --- /dev/null +++ b/subworkflows/local/standardisation_profiles.nf @@ -0,0 +1,56 @@ +// +// Create Krona visualizations +// + +include { MOTUS_MERGE } from '../../modules/nf-core/modules/motus/merge/main' + +workflow STANDARDISATION_PROFILES { + take: + classifications + profiles + databases + motu_version + + main: + ch_standardised_tables = Channel.empty() + ch_versions = Channel.empty() + + /* + Split profile results based on tool they come from + */ + ch_input_profiles = profiles + .branch { + motus: it[0]['tool'] == 'motus' + unknown: true + } + + ch_input_classifications = classifications + .branch { + unknown: true + } + + ch_input_databases = databases + .branch { + motus: it[0]['tool'] == 'motus' + unknown: true + } + + /* + Standardise and aggregate + */ + + // mOTUs has a 'single' database, and cannot create custom ones. + // Therefore removing db info here, and publish merged at root mOTUs results + // directory + MOTUS_MERGE ( ch_input_profiles.motus.map{it[1]}.collect(), ch_input_databases.motus.map{it[1]}, motu_version, params.generate_biom_output ) + if ( params.generate_biom_output ) { + ch_standardised_tables = ch_standardised_tables.mix ( MOTUS_MERGE.out.biom ) + } else { + ch_standardised_tables = ch_standardised_tables.mix ( MOTUS_MERGE.out.txt ) + } + ch_versions = ch_versions.mix( MOTUS_MERGE.out.versions ) + + emit: + tables = ch_standardised_tables + versions = ch_versions +} diff --git a/subworkflows/local/visualization_krona.nf b/subworkflows/local/visualization_krona.nf index 7a94fc6..397251f 100644 --- a/subworkflows/local/visualization_krona.nf +++ b/subworkflows/local/visualization_krona.nf @@ -78,7 +78,7 @@ workflow VISUALIZATION_KRONA { ch_krona_text_for_import = ch_cleaned_krona_text .map{[[id: it[0]['db_name'], tool: it[0]['tool']], it[1]]} .groupTuple() - .dump(tag: "text") + KRONA_KTIMPORTTEXT( ch_krona_text_for_import ) ch_krona_html = ch_krona_html.mix( KRONA_KTIMPORTTEXT.out.html ) ch_versions = ch_versions.mix( KRONA_KTIMPORTTEXT.out.versions.first() ) @@ -92,7 +92,7 @@ workflow VISUALIZATION_KRONA { ch_krona_taxonomy_for_input = GUNZIP.out.gunzip .map{[[id: it[0]['db_name'], tool: it[0]['tool']], it[1]]} .groupTuple() - .dump(tag: "taxonomy") + KRONA_KTIMPORTTAXONOMY ( ch_krona_taxonomy_for_input, file(params.krona_taxonomy_directory, checkExists: true) ) ch_krona_html.mix( KRONA_KTIMPORTTAXONOMY.out.html ) ch_versions = ch_versions.mix( MEGAN_RMA2INFO.out.versions.first() ) diff --git a/workflows/taxprofiler.nf b/workflows/taxprofiler.nf index 2037649..85a3a51 100644 --- a/workflows/taxprofiler.nf +++ b/workflows/taxprofiler.nf @@ -65,6 +65,7 @@ include { LONGREAD_HOSTREMOVAL } from '../subworkflows/local/longread_h include { SHORTREAD_COMPLEXITYFILTERING } from '../subworkflows/local/shortread_complexityfiltering' include { PROFILING } from '../subworkflows/local/profiling' include { VISUALIZATION_KRONA } from '../subworkflows/local/visualization_krona' +include { STANDARDISATION_PROFILES } from '../subworkflows/local/standardisation_profiles' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -220,6 +221,14 @@ workflow TAXPROFILER { ch_versions = ch_versions.mix( VISUALIZATION_KRONA.out.versions ) } + /* + SUBWORKFLOW: PROFILING STANDARDISATION + */ + if ( params.run_profile_standardisation ) { + STANDARDISATION_PROFILES ( PROFILING.out.classifications, PROFILING.out.profiles, DB_CHECK.out.dbs, PROFILING.out.motus_version ) + ch_versions = ch_versions.mix( STANDARDISATION_PROFILES.out.versions ) + } + /* MODULE: MultiQC */