mirror of
https://github.com/MillironX/taxprofiler.git
synced 2024-11-25 04:19:54 +00:00
Merge branch 'nf-core:dev' into diamond-update
This commit is contained in:
commit
49487593e2
16 changed files with 220 additions and 19 deletions
|
@ -145,7 +145,7 @@ process {
|
|||
publishDir = [
|
||||
path: { "${params.outdir}/filtlong" },
|
||||
mode: params.publish_dir_mode,
|
||||
pattern: '*.fastq.gz',
|
||||
pattern: '*.{fastq.gz,log}',
|
||||
enabled: params.save_preprocessed_reads
|
||||
]
|
||||
}
|
||||
|
@ -347,6 +347,7 @@ process {
|
|||
}
|
||||
|
||||
withName: CENTRIFUGE_KREPORT {
|
||||
errorStrategy = {task.exitStatus == 255 ? 'ignore' : 'retry'}
|
||||
ext.args = { "${meta.db_params}" }
|
||||
ext.prefix = params.perform_runmerging ? { "${meta.id}-${meta.db_name}" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}" }
|
||||
publishDir = [
|
||||
|
@ -398,6 +399,13 @@ process {
|
|||
]
|
||||
}
|
||||
|
||||
withName: MOTUS_MERGE {
|
||||
publishDir = [
|
||||
path: { "${params.outdir}/motus/" },
|
||||
mode: params.publish_dir_mode
|
||||
]
|
||||
}
|
||||
|
||||
withName: CUSTOM_DUMPSOFTWAREVERSIONS {
|
||||
publishDir = [
|
||||
path: { "${params.outdir}/pipeline_info" },
|
||||
|
|
|
@ -38,4 +38,5 @@ params {
|
|||
run_centrifuge = false
|
||||
run_diamond = false
|
||||
run_motus = true
|
||||
run_profile_standardisation = true
|
||||
}
|
||||
|
|
|
@ -22,7 +22,7 @@
|
|||
"git_sha": "d2726fcf75063960f06b36d2229a4c0966614108"
|
||||
},
|
||||
"centrifuge/kreport": {
|
||||
"git_sha": "be4ae28c3c95b3c4047a7d9fb4cb0ed749631cea"
|
||||
"git_sha": "734d0db6079a4aa43b6509b207e5d6feb35d4838"
|
||||
},
|
||||
"custom/dumpsoftwareversions": {
|
||||
"git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d"
|
||||
|
@ -37,7 +37,7 @@
|
|||
"git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d"
|
||||
},
|
||||
"filtlong": {
|
||||
"git_sha": "089f761f0bf79c4a486f1df9b6205f650196a2c1"
|
||||
"git_sha": "957cb9b83668075f4af101fc99502908cca487e3"
|
||||
},
|
||||
"gunzip": {
|
||||
"git_sha": "9aadd9a6d3f5964476582319b3a1c54a3e3fe7c9"
|
||||
|
@ -78,6 +78,9 @@
|
|||
"minimap2/index": {
|
||||
"git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d"
|
||||
},
|
||||
"motus/merge": {
|
||||
"git_sha": "b02e648c221e1da17cb589eefe297e61ec9e9c49"
|
||||
},
|
||||
"motus/profile": {
|
||||
"git_sha": "b6ed584443ad68ac41e6975994139454a4f23c18"
|
||||
},
|
||||
|
|
|
@ -8,7 +8,7 @@ process CENTRIFUGE_KREPORT {
|
|||
'quay.io/biocontainers/centrifuge:1.0.4_beta--h9a82719_6' }"
|
||||
|
||||
input:
|
||||
tuple val(meta), path(results)
|
||||
tuple val(meta), path(report)
|
||||
path db
|
||||
|
||||
output:
|
||||
|
@ -23,7 +23,7 @@ process CENTRIFUGE_KREPORT {
|
|||
def prefix = task.ext.prefix ?: "${meta.id}"
|
||||
"""
|
||||
db_name=`find -L ${db} -name "*.1.cf" -not -name "._*" | sed 's/.1.cf//'`
|
||||
centrifuge-kreport -x \$db_name ${results} > ${prefix}.txt
|
||||
centrifuge-kreport -x \$db_name ${report} > ${prefix}.txt
|
||||
|
||||
cat <<-END_VERSIONS > versions.yml
|
||||
"${task.process}":
|
||||
|
|
|
@ -15,9 +15,9 @@ input:
|
|||
description: |
|
||||
Groovy Map containing sample information
|
||||
e.g. [ id:'test', single_end:false ]
|
||||
- results:
|
||||
- report:
|
||||
type: file
|
||||
description: File containing the centrifuge classification results
|
||||
description: File containing the centrifuge classification report
|
||||
pattern: "*.{txt}"
|
||||
|
||||
output:
|
||||
|
|
4
modules/nf-core/modules/filtlong/main.nf
generated
4
modules/nf-core/modules/filtlong/main.nf
generated
|
@ -12,7 +12,8 @@ process FILTLONG {
|
|||
|
||||
output:
|
||||
tuple val(meta), path("*.fastq.gz"), emit: reads
|
||||
path "versions.yml" , emit: versions
|
||||
tuple val(meta), path("*.log") , emit: log
|
||||
path "versions.yml" , emit: versions
|
||||
|
||||
when:
|
||||
task.ext.when == null || task.ext.when
|
||||
|
@ -27,6 +28,7 @@ process FILTLONG {
|
|||
$short_reads \\
|
||||
$args \\
|
||||
$longreads \\
|
||||
2> ${prefix}.log \\
|
||||
| gzip -n > ${prefix}.fastq.gz
|
||||
|
||||
cat <<-END_VERSIONS > versions.yml
|
||||
|
|
5
modules/nf-core/modules/filtlong/meta.yml
generated
5
modules/nf-core/modules/filtlong/meta.yml
generated
|
@ -45,6 +45,11 @@ output:
|
|||
type: file
|
||||
description: Filtered (compressed) fastq file
|
||||
pattern: "*.fastq.gz"
|
||||
- log:
|
||||
type: file
|
||||
description: Standard error logging file containing summary statistics
|
||||
pattern: "*.log"
|
||||
|
||||
authors:
|
||||
- "@d4straub"
|
||||
- "@sofstam"
|
||||
|
|
47
modules/nf-core/modules/motus/merge/main.nf
generated
Normal file
47
modules/nf-core/modules/motus/merge/main.nf
generated
Normal file
|
@ -0,0 +1,47 @@
|
|||
VERSION = '3.0.1'
|
||||
|
||||
process MOTUS_MERGE {
|
||||
label 'process_low'
|
||||
|
||||
conda (params.enable_conda ? "bioconda::motus=3.0.1" : null)
|
||||
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
|
||||
'https://depot.galaxyproject.org/singularity/motus:3.0.1--pyhdfd78af_0':
|
||||
'quay.io/biocontainers/motus:3.0.1--pyhdfd78af_0' }"
|
||||
|
||||
input:
|
||||
path input
|
||||
path db // to stop docker saying it can't find it... would have to have the module in upstream steps anyway
|
||||
path profile_version_yml, stageAs: 'profile_version.yml'
|
||||
val biom_format
|
||||
|
||||
output:
|
||||
path("*.txt") , optional: true, emit: txt
|
||||
path("*.biom"), optional: true, emit: biom
|
||||
path "versions.yml" , emit: versions
|
||||
|
||||
when:
|
||||
task.ext.when == null || task.ext.when
|
||||
|
||||
script:
|
||||
def args = task.ext.args ?: ''
|
||||
def prefix = 'motus_merged'
|
||||
def cmd_input = input.size() > 1 ? "-i ${input.join(',')}" : input.isDirectory() ? "-d ${input}" : "-i ${input}"
|
||||
def output = biom_format ? "-B -o ${prefix}.biom" : "-o ${prefix}.txt"
|
||||
"""
|
||||
motus \\
|
||||
merge \\
|
||||
-db $db \\
|
||||
${cmd_input} \\
|
||||
$args \\
|
||||
${output}
|
||||
|
||||
## Take version from the mOTUs/profile module output, as cannot reconstruct
|
||||
## version without having database staged in this directory.
|
||||
VERSION=\$(cat ${profile_version_yml} | grep '/*motus:.*' | sed 's/.*otus: //g')
|
||||
|
||||
cat <<-END_VERSIONS > versions.yml
|
||||
"${task.process}":
|
||||
motus: \$VERSION
|
||||
END_VERSIONS
|
||||
"""
|
||||
}
|
57
modules/nf-core/modules/motus/merge/meta.yml
generated
Normal file
57
modules/nf-core/modules/motus/merge/meta.yml
generated
Normal file
|
@ -0,0 +1,57 @@
|
|||
name: "motus_merge"
|
||||
description: Taxonomic meta-omics profiling using universal marker genes
|
||||
keywords:
|
||||
- classify
|
||||
- metagenomics
|
||||
- fastq
|
||||
- taxonomic profiling
|
||||
- merging
|
||||
- merge
|
||||
- otu table
|
||||
tools:
|
||||
- "motus":
|
||||
description: "Marker gene-based OTU (mOTU) profiling"
|
||||
homepage: "https://motu-tool.org/"
|
||||
documentation: "https://github.com/motu-tool/mOTUs/wiki"
|
||||
tool_dev_url: "https://github.com/motu-tool/mOTUs"
|
||||
doi: "10.1038/s41467-019-08844-4"
|
||||
licence: "['GPL v3']"
|
||||
|
||||
input:
|
||||
- input:
|
||||
type: file
|
||||
description: |
|
||||
List of output files (more than one) from motus profile,
|
||||
or a single directory containing motus output files.
|
||||
- db:
|
||||
type: directory
|
||||
description: |
|
||||
mOTUs database downloaded by `motus downloadDB`
|
||||
pattern: "db_mOTU/"
|
||||
- profile_version_yml:
|
||||
type: file
|
||||
description: |
|
||||
A single versions.yml file output from motus/profile. motus/merge cannot reconstruct
|
||||
this itself without having the motus database present and configured with the tool
|
||||
so here we take it from what is already reported by the upstream module.
|
||||
pattern: "versions.yml"
|
||||
- biom_format:
|
||||
type: boolean
|
||||
description: Whether to save output OTU table in biom format
|
||||
|
||||
output:
|
||||
- versions:
|
||||
type: file
|
||||
description: File containing software versions
|
||||
pattern: "versions.yml"
|
||||
- txt:
|
||||
type: file
|
||||
description: OTU table in txt format, if BIOM format not requested
|
||||
pattern: "*.txt"
|
||||
- biom:
|
||||
type: file
|
||||
description: OTU table in biom format, if BIOM format requested
|
||||
pattern: "*.biom"
|
||||
|
||||
authors:
|
||||
- "@jfy133"
|
|
@ -132,6 +132,10 @@ params {
|
|||
// krona
|
||||
run_krona = false
|
||||
krona_taxonomy_directory = null
|
||||
|
||||
// profile standardisation
|
||||
run_profile_standardisation = false
|
||||
generate_biom_output = false
|
||||
}
|
||||
|
||||
// Load base.config by default for all pipelines
|
||||
|
|
|
@ -52,7 +52,8 @@
|
|||
"type": "string",
|
||||
"description": "Name of iGenomes reference.",
|
||||
"fa_icon": "fas fa-book",
|
||||
"help_text": "If using a reference genome configured in the pipeline using iGenomes, use this parameter to give the ID for the reference. This is then used to build the full paths for all required reference genome files e.g. `--genome GRCh38`. \n\nSee the [nf-core website docs](https://nf-co.re/usage/reference_genomes) for more details."
|
||||
"help_text": "If using a reference genome configured in the pipeline using iGenomes, use this parameter to give the ID for the reference. This is then used to build the full paths for all required reference genome files e.g. `--genome GRCh38`. \n\nSee the [nf-core website docs](https://nf-co.re/usage/reference_genomes) for more details.",
|
||||
"hidden": true
|
||||
},
|
||||
"igenomes_base": {
|
||||
"type": "string",
|
||||
|
@ -444,7 +445,13 @@
|
|||
},
|
||||
"krona_taxonomy_directory": {
|
||||
"type": "string",
|
||||
"default": null
|
||||
"default": "None"
|
||||
},
|
||||
"run_profile_standardisation": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"generate_biom_output": {
|
||||
"type": "boolean"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -30,6 +30,7 @@ workflow LONGREAD_PREPROCESSING {
|
|||
|
||||
ch_processed_reads = FILTLONG ( reads.map{ meta, reads -> [meta, [], reads ]} )
|
||||
ch_versions = ch_versions.mix(FILTLONG.out.versions.first())
|
||||
ch_multiqc_files = ch_multiqc_files.mix( FILTLONG.out.log )
|
||||
|
||||
} else {
|
||||
PORECHOP ( reads )
|
||||
|
@ -45,6 +46,7 @@ workflow LONGREAD_PREPROCESSING {
|
|||
|
||||
ch_versions = ch_versions.mix(PORECHOP.out.versions.first())
|
||||
ch_versions = ch_versions.mix(FILTLONG.out.versions.first())
|
||||
ch_multiqc_files = ch_multiqc_files.mix( FILTLONG.out.log )
|
||||
|
||||
}
|
||||
|
||||
|
|
|
@ -110,7 +110,7 @@ workflow PROFILING {
|
|||
}
|
||||
|
||||
MEGAN_RMA2INFO (ch_maltrun_for_megan, params.malt_generate_megansummary )
|
||||
ch_multiqc_files = ch_multiqc_files.mix( MALT_RUN.out.log.collect{it[1]}.ifEmpty([]) )
|
||||
ch_multiqc_files = ch_multiqc_files.mix( MALT_RUN.out.log )
|
||||
ch_versions = ch_versions.mix( MALT_RUN.out.versions.first(), MEGAN_RMA2INFO.out.versions.first() )
|
||||
ch_raw_classifications = ch_raw_classifications.mix( ch_maltrun_for_megan )
|
||||
ch_raw_profiles = ch_raw_profiles.mix( MEGAN_RMA2INFO.out.txt )
|
||||
|
@ -127,7 +127,7 @@ workflow PROFILING {
|
|||
}
|
||||
|
||||
KRAKEN2_KRAKEN2 ( ch_input_for_kraken2.reads, ch_input_for_kraken2.db, params.kraken2_save_reads, params.kraken2_save_readclassification )
|
||||
ch_multiqc_files = ch_multiqc_files.mix( KRAKEN2_KRAKEN2.out.report.collect{it[1]}.ifEmpty([]) )
|
||||
ch_multiqc_files = ch_multiqc_files.mix( KRAKEN2_KRAKEN2.out.report )
|
||||
ch_versions = ch_versions.mix( KRAKEN2_KRAKEN2.out.versions.first() )
|
||||
ch_raw_classifications = ch_raw_classifications.mix( KRAKEN2_KRAKEN2.out.classified_reads_assignment )
|
||||
ch_raw_profiles = ch_raw_profiles.mix( KRAKEN2_KRAKEN2.out.report )
|
||||
|
@ -148,7 +148,7 @@ workflow PROFILING {
|
|||
}
|
||||
|
||||
CENTRIFUGE_CENTRIFUGE ( ch_input_for_centrifuge.reads, ch_input_for_centrifuge.db, params.centrifuge_save_reads, params.centrifuge_save_reads, params.centrifuge_save_reads )
|
||||
CENTRIFUGE_KREPORT (CENTRIFUGE_CENTRIFUGE.out.results, ch_input_for_centrifuge.db)
|
||||
CENTRIFUGE_KREPORT (CENTRIFUGE_CENTRIFUGE.out.report, ch_input_for_centrifuge.db)
|
||||
ch_versions = ch_versions.mix( CENTRIFUGE_CENTRIFUGE.out.versions.first() )
|
||||
ch_raw_classifications = ch_raw_classifications.mix( CENTRIFUGE_CENTRIFUGE.out.results )
|
||||
ch_raw_profiles = ch_raw_profiles.mix( CENTRIFUGE_KREPORT.out.kreport )
|
||||
|
@ -185,7 +185,7 @@ workflow PROFILING {
|
|||
|
||||
KAIJU_KAIJU ( ch_input_for_kaiju.reads, ch_input_for_kaiju.db)
|
||||
KAIJU_KAIJU2TABLE (KAIJU_KAIJU.out.results, ch_input_for_kaiju.db, params.kaiju_taxon_name)
|
||||
ch_multiqc_files = ch_multiqc_files.mix( KAIJU_KAIJU2TABLE.out.summary.collect{it[1]}.ifEmpty([]) )
|
||||
ch_multiqc_files = ch_multiqc_files.mix( KAIJU_KAIJU2TABLE.out.summary )
|
||||
ch_versions = ch_versions.mix( KAIJU_KAIJU.out.versions.first() )
|
||||
ch_raw_classifications = ch_raw_classifications.mix( KAIJU_KAIJU.out.results )
|
||||
ch_raw_profiles = ch_raw_profiles.mix( KAIJU_KAIJU2TABLE.out.summary )
|
||||
|
@ -228,13 +228,13 @@ workflow PROFILING {
|
|||
MOTUS_PROFILE ( ch_input_for_motus.reads, ch_input_for_motus.db )
|
||||
ch_versions = ch_versions.mix( MOTUS_PROFILE.out.versions.first() )
|
||||
ch_raw_profiles = ch_raw_profiles.mix( MOTUS_PROFILE.out.out )
|
||||
ch_multiqc_files = ch_multiqc_files.mix( MOTUS_PROFILE.out.log.map{it[1]} )
|
||||
|
||||
ch_multiqc_files = ch_multiqc_files.mix( MOTUS_PROFILE.out.log )
|
||||
}
|
||||
|
||||
emit:
|
||||
classifications = ch_raw_classifications
|
||||
profiles = ch_raw_profiles // channel: [ val(meta), [ reads ] ] - should be text files or biom
|
||||
versions = ch_versions // channel: [ versions.yml ]
|
||||
motus_version = params.run_motus ? MOTUS_PROFILE.out.versions.first() : Channel.empty()
|
||||
mqc = ch_multiqc_files
|
||||
}
|
||||
|
|
56
subworkflows/local/standardisation_profiles.nf
Normal file
56
subworkflows/local/standardisation_profiles.nf
Normal file
|
@ -0,0 +1,56 @@
|
|||
//
|
||||
// Create Krona visualizations
|
||||
//
|
||||
|
||||
include { MOTUS_MERGE } from '../../modules/nf-core/modules/motus/merge/main'
|
||||
|
||||
workflow STANDARDISATION_PROFILES {
|
||||
take:
|
||||
classifications
|
||||
profiles
|
||||
databases
|
||||
motu_version
|
||||
|
||||
main:
|
||||
ch_standardised_tables = Channel.empty()
|
||||
ch_versions = Channel.empty()
|
||||
|
||||
/*
|
||||
Split profile results based on tool they come from
|
||||
*/
|
||||
ch_input_profiles = profiles
|
||||
.branch {
|
||||
motus: it[0]['tool'] == 'motus'
|
||||
unknown: true
|
||||
}
|
||||
|
||||
ch_input_classifications = classifications
|
||||
.branch {
|
||||
unknown: true
|
||||
}
|
||||
|
||||
ch_input_databases = databases
|
||||
.branch {
|
||||
motus: it[0]['tool'] == 'motus'
|
||||
unknown: true
|
||||
}
|
||||
|
||||
/*
|
||||
Standardise and aggregate
|
||||
*/
|
||||
|
||||
// mOTUs has a 'single' database, and cannot create custom ones.
|
||||
// Therefore removing db info here, and publish merged at root mOTUs results
|
||||
// directory
|
||||
MOTUS_MERGE ( ch_input_profiles.motus.map{it[1]}.collect(), ch_input_databases.motus.map{it[1]}, motu_version, params.generate_biom_output )
|
||||
if ( params.generate_biom_output ) {
|
||||
ch_standardised_tables = ch_standardised_tables.mix ( MOTUS_MERGE.out.biom )
|
||||
} else {
|
||||
ch_standardised_tables = ch_standardised_tables.mix ( MOTUS_MERGE.out.txt )
|
||||
}
|
||||
ch_versions = ch_versions.mix( MOTUS_MERGE.out.versions )
|
||||
|
||||
emit:
|
||||
tables = ch_standardised_tables
|
||||
versions = ch_versions
|
||||
}
|
|
@ -78,7 +78,7 @@ workflow VISUALIZATION_KRONA {
|
|||
ch_krona_text_for_import = ch_cleaned_krona_text
|
||||
.map{[[id: it[0]['db_name'], tool: it[0]['tool']], it[1]]}
|
||||
.groupTuple()
|
||||
.dump(tag: "text")
|
||||
|
||||
KRONA_KTIMPORTTEXT( ch_krona_text_for_import )
|
||||
ch_krona_html = ch_krona_html.mix( KRONA_KTIMPORTTEXT.out.html )
|
||||
ch_versions = ch_versions.mix( KRONA_KTIMPORTTEXT.out.versions.first() )
|
||||
|
@ -92,7 +92,7 @@ workflow VISUALIZATION_KRONA {
|
|||
ch_krona_taxonomy_for_input = GUNZIP.out.gunzip
|
||||
.map{[[id: it[0]['db_name'], tool: it[0]['tool']], it[1]]}
|
||||
.groupTuple()
|
||||
.dump(tag: "taxonomy")
|
||||
|
||||
KRONA_KTIMPORTTAXONOMY ( ch_krona_taxonomy_for_input, file(params.krona_taxonomy_directory, checkExists: true) )
|
||||
ch_krona_html.mix( KRONA_KTIMPORTTAXONOMY.out.html )
|
||||
ch_versions = ch_versions.mix( MEGAN_RMA2INFO.out.versions.first() )
|
||||
|
|
|
@ -65,6 +65,7 @@ include { LONGREAD_HOSTREMOVAL } from '../subworkflows/local/longread_h
|
|||
include { SHORTREAD_COMPLEXITYFILTERING } from '../subworkflows/local/shortread_complexityfiltering'
|
||||
include { PROFILING } from '../subworkflows/local/profiling'
|
||||
include { VISUALIZATION_KRONA } from '../subworkflows/local/visualization_krona'
|
||||
include { STANDARDISATION_PROFILES } from '../subworkflows/local/standardisation_profiles'
|
||||
|
||||
/*
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
@ -220,6 +221,14 @@ workflow TAXPROFILER {
|
|||
ch_versions = ch_versions.mix( VISUALIZATION_KRONA.out.versions )
|
||||
}
|
||||
|
||||
/*
|
||||
SUBWORKFLOW: PROFILING STANDARDISATION
|
||||
*/
|
||||
if ( params.run_profile_standardisation ) {
|
||||
STANDARDISATION_PROFILES ( PROFILING.out.classifications, PROFILING.out.profiles, DB_CHECK.out.dbs, PROFILING.out.motus_version )
|
||||
ch_versions = ch_versions.mix( STANDARDISATION_PROFILES.out.versions )
|
||||
}
|
||||
|
||||
/*
|
||||
MODULE: MultiQC
|
||||
*/
|
||||
|
@ -257,7 +266,7 @@ workflow TAXPROFILER {
|
|||
ch_multiqc_files = ch_multiqc_files.mix(SHORTREAD_HOSTREMOVAL.out.mqc.collect{it[1]}.ifEmpty([]))
|
||||
}
|
||||
|
||||
ch_multiqc_files = ch_multiqc_files.mix( PROFILING.out.mqc )
|
||||
ch_multiqc_files = ch_multiqc_files.mix( PROFILING.out.mqc.collect{it[1]}.ifEmpty([]) )
|
||||
|
||||
// TODO create multiQC module for metaphlan
|
||||
MULTIQC (
|
||||
|
|
Loading…
Reference in a new issue