1
0
Fork 0
mirror of https://github.com/MillironX/taxprofiler.git synced 2024-11-25 06:49:55 +00:00

Merge branch 'nf-core:dev' into diamond-update

This commit is contained in:
Mahwash Jamy 2022-07-28 15:48:59 +02:00 committed by GitHub
commit 49487593e2
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
16 changed files with 220 additions and 19 deletions

View file

@ -145,7 +145,7 @@ process {
publishDir = [
path: { "${params.outdir}/filtlong" },
mode: params.publish_dir_mode,
pattern: '*.fastq.gz',
pattern: '*.{fastq.gz,log}',
enabled: params.save_preprocessed_reads
]
}
@ -347,6 +347,7 @@ process {
}
withName: CENTRIFUGE_KREPORT {
errorStrategy = {task.exitStatus == 255 ? 'ignore' : 'retry'}
ext.args = { "${meta.db_params}" }
ext.prefix = params.perform_runmerging ? { "${meta.id}-${meta.db_name}" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}" }
publishDir = [
@ -398,6 +399,13 @@ process {
]
}
withName: MOTUS_MERGE {
publishDir = [
path: { "${params.outdir}/motus/" },
mode: params.publish_dir_mode
]
}
withName: CUSTOM_DUMPSOFTWAREVERSIONS {
publishDir = [
path: { "${params.outdir}/pipeline_info" },

View file

@ -38,4 +38,5 @@ params {
run_centrifuge = false
run_diamond = false
run_motus = true
run_profile_standardisation = true
}

View file

@ -22,7 +22,7 @@
"git_sha": "d2726fcf75063960f06b36d2229a4c0966614108"
},
"centrifuge/kreport": {
"git_sha": "be4ae28c3c95b3c4047a7d9fb4cb0ed749631cea"
"git_sha": "734d0db6079a4aa43b6509b207e5d6feb35d4838"
},
"custom/dumpsoftwareversions": {
"git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d"
@ -37,7 +37,7 @@
"git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d"
},
"filtlong": {
"git_sha": "089f761f0bf79c4a486f1df9b6205f650196a2c1"
"git_sha": "957cb9b83668075f4af101fc99502908cca487e3"
},
"gunzip": {
"git_sha": "9aadd9a6d3f5964476582319b3a1c54a3e3fe7c9"
@ -78,6 +78,9 @@
"minimap2/index": {
"git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d"
},
"motus/merge": {
"git_sha": "b02e648c221e1da17cb589eefe297e61ec9e9c49"
},
"motus/profile": {
"git_sha": "b6ed584443ad68ac41e6975994139454a4f23c18"
},

View file

@ -8,7 +8,7 @@ process CENTRIFUGE_KREPORT {
'quay.io/biocontainers/centrifuge:1.0.4_beta--h9a82719_6' }"
input:
tuple val(meta), path(results)
tuple val(meta), path(report)
path db
output:
@ -23,7 +23,7 @@ process CENTRIFUGE_KREPORT {
def prefix = task.ext.prefix ?: "${meta.id}"
"""
db_name=`find -L ${db} -name "*.1.cf" -not -name "._*" | sed 's/.1.cf//'`
centrifuge-kreport -x \$db_name ${results} > ${prefix}.txt
centrifuge-kreport -x \$db_name ${report} > ${prefix}.txt
cat <<-END_VERSIONS > versions.yml
"${task.process}":

View file

@ -15,9 +15,9 @@ input:
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- results:
- report:
type: file
description: File containing the centrifuge classification results
description: File containing the centrifuge classification report
pattern: "*.{txt}"
output:

View file

@ -12,6 +12,7 @@ process FILTLONG {
output:
tuple val(meta), path("*.fastq.gz"), emit: reads
tuple val(meta), path("*.log") , emit: log
path "versions.yml" , emit: versions
when:
@ -27,6 +28,7 @@ process FILTLONG {
$short_reads \\
$args \\
$longreads \\
2> ${prefix}.log \\
| gzip -n > ${prefix}.fastq.gz
cat <<-END_VERSIONS > versions.yml

View file

@ -45,6 +45,11 @@ output:
type: file
description: Filtered (compressed) fastq file
pattern: "*.fastq.gz"
- log:
type: file
description: Standard error logging file containing summary statistics
pattern: "*.log"
authors:
- "@d4straub"
- "@sofstam"

View file

@ -0,0 +1,47 @@
VERSION = '3.0.1'
process MOTUS_MERGE {
label 'process_low'
conda (params.enable_conda ? "bioconda::motus=3.0.1" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/motus:3.0.1--pyhdfd78af_0':
'quay.io/biocontainers/motus:3.0.1--pyhdfd78af_0' }"
input:
path input
path db // to stop docker saying it can't find it... would have to have the module in upstream steps anyway
path profile_version_yml, stageAs: 'profile_version.yml'
val biom_format
output:
path("*.txt") , optional: true, emit: txt
path("*.biom"), optional: true, emit: biom
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
def prefix = 'motus_merged'
def cmd_input = input.size() > 1 ? "-i ${input.join(',')}" : input.isDirectory() ? "-d ${input}" : "-i ${input}"
def output = biom_format ? "-B -o ${prefix}.biom" : "-o ${prefix}.txt"
"""
motus \\
merge \\
-db $db \\
${cmd_input} \\
$args \\
${output}
## Take version from the mOTUs/profile module output, as cannot reconstruct
## version without having database staged in this directory.
VERSION=\$(cat ${profile_version_yml} | grep '/*motus:.*' | sed 's/.*otus: //g')
cat <<-END_VERSIONS > versions.yml
"${task.process}":
motus: \$VERSION
END_VERSIONS
"""
}

View file

@ -0,0 +1,57 @@
name: "motus_merge"
description: Taxonomic meta-omics profiling using universal marker genes
keywords:
- classify
- metagenomics
- fastq
- taxonomic profiling
- merging
- merge
- otu table
tools:
- "motus":
description: "Marker gene-based OTU (mOTU) profiling"
homepage: "https://motu-tool.org/"
documentation: "https://github.com/motu-tool/mOTUs/wiki"
tool_dev_url: "https://github.com/motu-tool/mOTUs"
doi: "10.1038/s41467-019-08844-4"
licence: "['GPL v3']"
input:
- input:
type: file
description: |
List of output files (more than one) from motus profile,
or a single directory containing motus output files.
- db:
type: directory
description: |
mOTUs database downloaded by `motus downloadDB`
pattern: "db_mOTU/"
- profile_version_yml:
type: file
description: |
A single versions.yml file output from motus/profile. motus/merge cannot reconstruct
this itself without having the motus database present and configured with the tool
so here we take it from what is already reported by the upstream module.
pattern: "versions.yml"
- biom_format:
type: boolean
description: Whether to save output OTU table in biom format
output:
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- txt:
type: file
description: OTU table in txt format, if BIOM format not requested
pattern: "*.txt"
- biom:
type: file
description: OTU table in biom format, if BIOM format requested
pattern: "*.biom"
authors:
- "@jfy133"

View file

@ -132,6 +132,10 @@ params {
// krona
run_krona = false
krona_taxonomy_directory = null
// profile standardisation
run_profile_standardisation = false
generate_biom_output = false
}
// Load base.config by default for all pipelines

View file

@ -52,7 +52,8 @@
"type": "string",
"description": "Name of iGenomes reference.",
"fa_icon": "fas fa-book",
"help_text": "If using a reference genome configured in the pipeline using iGenomes, use this parameter to give the ID for the reference. This is then used to build the full paths for all required reference genome files e.g. `--genome GRCh38`. \n\nSee the [nf-core website docs](https://nf-co.re/usage/reference_genomes) for more details."
"help_text": "If using a reference genome configured in the pipeline using iGenomes, use this parameter to give the ID for the reference. This is then used to build the full paths for all required reference genome files e.g. `--genome GRCh38`. \n\nSee the [nf-core website docs](https://nf-co.re/usage/reference_genomes) for more details.",
"hidden": true
},
"igenomes_base": {
"type": "string",
@ -444,7 +445,13 @@
},
"krona_taxonomy_directory": {
"type": "string",
"default": null
"default": "None"
},
"run_profile_standardisation": {
"type": "boolean"
},
"generate_biom_output": {
"type": "boolean"
}
}
}

View file

@ -30,6 +30,7 @@ workflow LONGREAD_PREPROCESSING {
ch_processed_reads = FILTLONG ( reads.map{ meta, reads -> [meta, [], reads ]} )
ch_versions = ch_versions.mix(FILTLONG.out.versions.first())
ch_multiqc_files = ch_multiqc_files.mix( FILTLONG.out.log )
} else {
PORECHOP ( reads )
@ -45,6 +46,7 @@ workflow LONGREAD_PREPROCESSING {
ch_versions = ch_versions.mix(PORECHOP.out.versions.first())
ch_versions = ch_versions.mix(FILTLONG.out.versions.first())
ch_multiqc_files = ch_multiqc_files.mix( FILTLONG.out.log )
}

View file

@ -110,7 +110,7 @@ workflow PROFILING {
}
MEGAN_RMA2INFO (ch_maltrun_for_megan, params.malt_generate_megansummary )
ch_multiqc_files = ch_multiqc_files.mix( MALT_RUN.out.log.collect{it[1]}.ifEmpty([]) )
ch_multiqc_files = ch_multiqc_files.mix( MALT_RUN.out.log )
ch_versions = ch_versions.mix( MALT_RUN.out.versions.first(), MEGAN_RMA2INFO.out.versions.first() )
ch_raw_classifications = ch_raw_classifications.mix( ch_maltrun_for_megan )
ch_raw_profiles = ch_raw_profiles.mix( MEGAN_RMA2INFO.out.txt )
@ -127,7 +127,7 @@ workflow PROFILING {
}
KRAKEN2_KRAKEN2 ( ch_input_for_kraken2.reads, ch_input_for_kraken2.db, params.kraken2_save_reads, params.kraken2_save_readclassification )
ch_multiqc_files = ch_multiqc_files.mix( KRAKEN2_KRAKEN2.out.report.collect{it[1]}.ifEmpty([]) )
ch_multiqc_files = ch_multiqc_files.mix( KRAKEN2_KRAKEN2.out.report )
ch_versions = ch_versions.mix( KRAKEN2_KRAKEN2.out.versions.first() )
ch_raw_classifications = ch_raw_classifications.mix( KRAKEN2_KRAKEN2.out.classified_reads_assignment )
ch_raw_profiles = ch_raw_profiles.mix( KRAKEN2_KRAKEN2.out.report )
@ -148,7 +148,7 @@ workflow PROFILING {
}
CENTRIFUGE_CENTRIFUGE ( ch_input_for_centrifuge.reads, ch_input_for_centrifuge.db, params.centrifuge_save_reads, params.centrifuge_save_reads, params.centrifuge_save_reads )
CENTRIFUGE_KREPORT (CENTRIFUGE_CENTRIFUGE.out.results, ch_input_for_centrifuge.db)
CENTRIFUGE_KREPORT (CENTRIFUGE_CENTRIFUGE.out.report, ch_input_for_centrifuge.db)
ch_versions = ch_versions.mix( CENTRIFUGE_CENTRIFUGE.out.versions.first() )
ch_raw_classifications = ch_raw_classifications.mix( CENTRIFUGE_CENTRIFUGE.out.results )
ch_raw_profiles = ch_raw_profiles.mix( CENTRIFUGE_KREPORT.out.kreport )
@ -185,7 +185,7 @@ workflow PROFILING {
KAIJU_KAIJU ( ch_input_for_kaiju.reads, ch_input_for_kaiju.db)
KAIJU_KAIJU2TABLE (KAIJU_KAIJU.out.results, ch_input_for_kaiju.db, params.kaiju_taxon_name)
ch_multiqc_files = ch_multiqc_files.mix( KAIJU_KAIJU2TABLE.out.summary.collect{it[1]}.ifEmpty([]) )
ch_multiqc_files = ch_multiqc_files.mix( KAIJU_KAIJU2TABLE.out.summary )
ch_versions = ch_versions.mix( KAIJU_KAIJU.out.versions.first() )
ch_raw_classifications = ch_raw_classifications.mix( KAIJU_KAIJU.out.results )
ch_raw_profiles = ch_raw_profiles.mix( KAIJU_KAIJU2TABLE.out.summary )
@ -228,13 +228,13 @@ workflow PROFILING {
MOTUS_PROFILE ( ch_input_for_motus.reads, ch_input_for_motus.db )
ch_versions = ch_versions.mix( MOTUS_PROFILE.out.versions.first() )
ch_raw_profiles = ch_raw_profiles.mix( MOTUS_PROFILE.out.out )
ch_multiqc_files = ch_multiqc_files.mix( MOTUS_PROFILE.out.log.map{it[1]} )
ch_multiqc_files = ch_multiqc_files.mix( MOTUS_PROFILE.out.log )
}
emit:
classifications = ch_raw_classifications
profiles = ch_raw_profiles // channel: [ val(meta), [ reads ] ] - should be text files or biom
versions = ch_versions // channel: [ versions.yml ]
motus_version = params.run_motus ? MOTUS_PROFILE.out.versions.first() : Channel.empty()
mqc = ch_multiqc_files
}

View file

@ -0,0 +1,56 @@
//
// Create Krona visualizations
//
include { MOTUS_MERGE } from '../../modules/nf-core/modules/motus/merge/main'
workflow STANDARDISATION_PROFILES {
take:
classifications
profiles
databases
motu_version
main:
ch_standardised_tables = Channel.empty()
ch_versions = Channel.empty()
/*
Split profile results based on tool they come from
*/
ch_input_profiles = profiles
.branch {
motus: it[0]['tool'] == 'motus'
unknown: true
}
ch_input_classifications = classifications
.branch {
unknown: true
}
ch_input_databases = databases
.branch {
motus: it[0]['tool'] == 'motus'
unknown: true
}
/*
Standardise and aggregate
*/
// mOTUs has a 'single' database, and cannot create custom ones.
// Therefore removing db info here, and publish merged at root mOTUs results
// directory
MOTUS_MERGE ( ch_input_profiles.motus.map{it[1]}.collect(), ch_input_databases.motus.map{it[1]}, motu_version, params.generate_biom_output )
if ( params.generate_biom_output ) {
ch_standardised_tables = ch_standardised_tables.mix ( MOTUS_MERGE.out.biom )
} else {
ch_standardised_tables = ch_standardised_tables.mix ( MOTUS_MERGE.out.txt )
}
ch_versions = ch_versions.mix( MOTUS_MERGE.out.versions )
emit:
tables = ch_standardised_tables
versions = ch_versions
}

View file

@ -78,7 +78,7 @@ workflow VISUALIZATION_KRONA {
ch_krona_text_for_import = ch_cleaned_krona_text
.map{[[id: it[0]['db_name'], tool: it[0]['tool']], it[1]]}
.groupTuple()
.dump(tag: "text")
KRONA_KTIMPORTTEXT( ch_krona_text_for_import )
ch_krona_html = ch_krona_html.mix( KRONA_KTIMPORTTEXT.out.html )
ch_versions = ch_versions.mix( KRONA_KTIMPORTTEXT.out.versions.first() )
@ -92,7 +92,7 @@ workflow VISUALIZATION_KRONA {
ch_krona_taxonomy_for_input = GUNZIP.out.gunzip
.map{[[id: it[0]['db_name'], tool: it[0]['tool']], it[1]]}
.groupTuple()
.dump(tag: "taxonomy")
KRONA_KTIMPORTTAXONOMY ( ch_krona_taxonomy_for_input, file(params.krona_taxonomy_directory, checkExists: true) )
ch_krona_html.mix( KRONA_KTIMPORTTAXONOMY.out.html )
ch_versions = ch_versions.mix( MEGAN_RMA2INFO.out.versions.first() )

View file

@ -65,6 +65,7 @@ include { LONGREAD_HOSTREMOVAL } from '../subworkflows/local/longread_h
include { SHORTREAD_COMPLEXITYFILTERING } from '../subworkflows/local/shortread_complexityfiltering'
include { PROFILING } from '../subworkflows/local/profiling'
include { VISUALIZATION_KRONA } from '../subworkflows/local/visualization_krona'
include { STANDARDISATION_PROFILES } from '../subworkflows/local/standardisation_profiles'
/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@ -220,6 +221,14 @@ workflow TAXPROFILER {
ch_versions = ch_versions.mix( VISUALIZATION_KRONA.out.versions )
}
/*
SUBWORKFLOW: PROFILING STANDARDISATION
*/
if ( params.run_profile_standardisation ) {
STANDARDISATION_PROFILES ( PROFILING.out.classifications, PROFILING.out.profiles, DB_CHECK.out.dbs, PROFILING.out.motus_version )
ch_versions = ch_versions.mix( STANDARDISATION_PROFILES.out.versions )
}
/*
MODULE: MultiQC
*/
@ -257,7 +266,7 @@ workflow TAXPROFILER {
ch_multiqc_files = ch_multiqc_files.mix(SHORTREAD_HOSTREMOVAL.out.mqc.collect{it[1]}.ifEmpty([]))
}
ch_multiqc_files = ch_multiqc_files.mix( PROFILING.out.mqc )
ch_multiqc_files = ch_multiqc_files.mix( PROFILING.out.mqc.collect{it[1]}.ifEmpty([]) )
// TODO create multiQC module for metaphlan
MULTIQC (