2022-04-10 04:43:30 +00:00
//
// Run profiling
//
include { MALT_RUN } from '../../modules/nf-core/modules/malt/run/main'
2022-04-16 05:42:30 +00:00
include { MEGAN_RMA2INFO } from '../../modules/nf-core/modules/megan/rma2info/main'
2022-04-10 04:43:30 +00:00
include { KRAKEN2_KRAKEN2 } from '../../modules/nf-core/modules/kraken2/kraken2/main'
include { CENTRIFUGE_CENTRIFUGE } from '../../modules/nf-core/modules/centrifuge/centrifuge/main'
2022-04-22 13:24:10 +00:00
include { CENTRIFUGE_KREPORT } from '../../modules/nf-core/modules/centrifuge/kreport/main'
2022-04-10 04:43:30 +00:00
include { METAPHLAN3 } from '../../modules/nf-core/modules/metaphlan3/main'
2022-04-13 16:51:56 +00:00
include { KAIJU_KAIJU } from '../../modules/nf-core/modules/kaiju/kaiju/main'
2022-04-25 15:36:29 +00:00
include { KAIJU_KAIJU2TABLE } from '../../modules/nf-core/modules/kaiju/kaiju2table/main'
2022-04-29 19:59:42 +00:00
include { DIAMOND_BLASTX } from '../../modules/nf-core/modules/diamond/blastx/main'
2022-05-23 12:05:06 +00:00
include { MOTUS_PROFILE } from '../../modules/nf-core/modules/motus/profile/main'
2022-04-10 04:43:30 +00:00
workflow PROFILING {
take:
2022-04-12 08:12:17 +00:00
reads // [ [ meta ], [ reads ] ]
2022-04-10 04:43:30 +00:00
databases // [ [ meta ], path ]
main:
2022-04-13 09:49:35 +00:00
ch_versions = Channel.empty()
ch_multiqc_files = Channel.empty()
ch_raw_profiles = Channel.empty()
2022-04-10 04:43:30 +00:00
/*
COMBINE READS WITH POSSIBLE DATABASES
*/
// e.g. output [DUMP: reads_plus_db] [['id':'2612', 'run_accession':'combined', 'instrument_platform':'ILLUMINA', 'single_end':1], <reads_path>/2612.merged.fastq.gz, ['tool':'malt', 'db_name':'mal95', 'db_params':'"-id 90"'], <db_path>/malt90]
2022-04-12 08:12:17 +00:00
ch_input_for_profiling = reads
2022-04-12 08:47:44 +00:00
.map {
2022-04-12 08:46:03 +00:00
meta, reads ->
def meta_new = meta.clone()
pairtype = meta_new['single_end'] ? '_se' : '_pe'
meta_new['id'] = meta_new['id'] + pairtype
[meta_new, reads]
}
2022-04-10 04:43:30 +00:00
.combine(databases)
.branch {
malt: it[2]['tool'] == 'malt'
kraken2: it[2]['tool'] == 'kraken2'
metaphlan3: it[2]['tool'] == 'metaphlan3'
centrifuge: it[2]['tool'] == 'centrifuge'
2022-04-13 16:51:56 +00:00
kaiju: it[2]['tool'] == 'kaiju'
2022-04-29 19:59:42 +00:00
diamond: it[2]['tool'] == 'diamond'
2022-05-23 12:05:06 +00:00
motus: it[2]['tool'] == 'motus'
2022-04-10 04:43:30 +00:00
unknown: true
}
/*
2022-05-01 05:24:58 +00:00
PREPARE PROFILER INPUT CHANNELS & RUN PROFILING
2022-04-10 04:43:30 +00:00
*/
// Each tool as a slightly different input structure and generally separate
// input channels for reads vs databases. We restructure the channel tuple
// for each tool and make liberal use of multiMap to keep reads/databases
// channel element order in sync with each other
2022-05-01 05:24:58 +00:00
if ( params.run_malt ) {
2022-04-10 04:43:30 +00:00
2022-04-13 16:51:56 +00:00
2022-05-01 05:24:58 +00:00
// MALT: We groupTuple to have all samples in one channel for MALT as database
// loading takes a long time, so we only want to run it once per database
// TODO document somewhere we only accept illumina short reads for MALT?
ch_input_for_malt = ch_input_for_profiling.malt
.filter { it[0]['instrument_platform'] == 'ILLUMINA' }
.map {
it ->
def temp_meta = [ id: it[2]['db_name']] + it[2]
def db = it[3]
[ temp_meta, it[1], db ]
}
.groupTuple(by: [0,2])
.multiMap {
it ->
reads: [ it[0], it[1].flatten() ]
db: it[2]
2022-05-01 05:28:29 +00:00
}
2022-04-10 04:43:30 +00:00
MALT_RUN ( ch_input_for_malt.reads, params.malt_mode, ch_input_for_malt.db )
2022-04-16 05:42:30 +00:00
ch_maltrun_for_megan = MALT_RUN.out.rma6
.transpose()
.map{
meta, rma ->
// re-extract meta from file names, use filename without rma to
// ensure we keep paired-end information in downstream filenames
// when no pair-merging
def meta_new = meta.clone()
meta_new['db_name'] = meta.id
2022-04-19 10:54:57 +00:00
meta_new['id'] = rma.baseName
2022-04-16 05:42:30 +00:00
[ meta_new, rma ]
}
MEGAN_RMA2INFO (ch_maltrun_for_megan, params.malt_generatemegansummary )
ch_multiqc_files = ch_multiqc_files.mix( MALT_RUN.out.log.collect{it[1]}.ifEmpty([]) )
2022-04-23 06:36:01 +00:00
ch_versions = ch_versions.mix( MALT_RUN.out.versions.first(), MEGAN_RMA2INFO.out.versions.first() )
2022-04-16 05:42:30 +00:00
ch_raw_profiles = ch_raw_profiles.mix( MEGAN_RMA2INFO.out.txt )
2022-05-01 05:24:58 +00:00
2022-04-10 04:43:30 +00:00
}
if ( params.run_kraken2 ) {
2022-05-01 05:24:58 +00:00
ch_input_for_kraken2 = ch_input_for_profiling.kraken2
.multiMap {
it ->
reads: [ it[0] + it[2], it[1] ]
db: it[3]
}
2022-04-10 04:43:30 +00:00
KRAKEN2_KRAKEN2 ( ch_input_for_kraken2.reads, ch_input_for_kraken2.db )
2022-04-16 05:42:30 +00:00
ch_multiqc_files = ch_multiqc_files.mix( KRAKEN2_KRAKEN2.out.txt.collect{it[1]}.ifEmpty([]) )
ch_versions = ch_versions.mix( KRAKEN2_KRAKEN2.out.versions.first() )
2022-04-13 09:49:35 +00:00
ch_raw_profiles = ch_raw_profiles.mix( KRAKEN2_KRAKEN2.out.txt )
2022-05-01 05:24:58 +00:00
2022-04-10 04:43:30 +00:00
}
if ( params.run_centrifuge ) {
2022-05-01 05:24:58 +00:00
ch_input_for_centrifuge = ch_input_for_profiling.centrifuge
.filter{
if (it[0].is_fasta) log.warn "[nf-core/taxprofiler] Centrifuge currently does not accept FASTA files as input. Skipping Centrifuge for sample ${it[0].id}."
!it[0].is_fasta
}
.multiMap {
it ->
reads: [ it[0] + it[2], it[1] ]
db: it[3]
}
2022-04-10 04:43:30 +00:00
CENTRIFUGE_CENTRIFUGE ( ch_input_for_centrifuge.reads, ch_input_for_centrifuge.db, params.centrifuge_save_unaligned, params.centrifuge_save_aligned, params.centrifuge_sam_format )
2022-04-22 13:24:10 +00:00
CENTRIFUGE_KREPORT (CENTRIFUGE_CENTRIFUGE.out.results, ch_input_for_centrifuge.db)
2022-04-16 05:42:30 +00:00
ch_versions = ch_versions.mix( CENTRIFUGE_CENTRIFUGE.out.versions.first() )
2022-04-22 13:24:10 +00:00
ch_raw_profiles = ch_raw_profiles.mix( CENTRIFUGE_KREPORT.out.kreport )
2022-05-01 05:24:58 +00:00
2022-04-10 04:43:30 +00:00
}
if ( params.run_metaphlan3 ) {
2022-05-01 05:24:58 +00:00
ch_input_for_metaphlan3 = ch_input_for_profiling.metaphlan3
.filter{
if (it[0].is_fasta) log.warn "[nf-core/taxprofiler] MetaPhlAn3 currently does not accept FASTA files as input. Skipping MetaPhlAn3 for sample ${it[0].id}."
!it[0].is_fasta
}
.multiMap {
it ->
reads: [it[0] + it[2], it[1]]
db: it[3]
}
2022-04-10 04:43:30 +00:00
METAPHLAN3 ( ch_input_for_metaphlan3.reads, ch_input_for_metaphlan3.db )
2022-04-16 05:42:30 +00:00
ch_versions = ch_versions.mix( METAPHLAN3.out.versions.first() )
2022-04-13 09:49:35 +00:00
ch_raw_profiles = ch_raw_profiles.mix( METAPHLAN3.out.biom )
2022-05-01 05:24:58 +00:00
2022-04-10 04:43:30 +00:00
}
2022-04-13 16:51:56 +00:00
if ( params.run_kaiju ) {
2022-05-01 05:24:58 +00:00
ch_input_for_kaiju = ch_input_for_profiling.kaiju
.multiMap {
it ->
reads: [it[0] + it[2], it[1]]
db: it[3]
}
2022-04-25 15:36:29 +00:00
KAIJU_KAIJU ( ch_input_for_kaiju.reads, ch_input_for_kaiju.db)
KAIJU_KAIJU2TABLE (KAIJU_KAIJU.out.results, ch_input_for_kaiju.db, params.kaiju_taxon_name)
ch_multiqc_files = ch_multiqc_files.mix( KAIJU_KAIJU2TABLE.out.summary.collect{it[1]}.ifEmpty([]) )
2022-04-13 16:51:56 +00:00
ch_versions = ch_versions.mix( KAIJU_KAIJU.out.versions.first() )
2022-04-25 15:36:29 +00:00
ch_raw_profiles = ch_raw_profiles.mix( KAIJU_KAIJU2TABLE.out.summary )
2022-05-01 05:24:58 +00:00
2022-04-13 16:51:56 +00:00
}
2022-04-10 04:43:30 +00:00
2022-04-29 19:59:42 +00:00
if ( params.run_diamond ) {
2022-05-01 05:24:58 +00:00
ch_input_for_diamond = ch_input_for_profiling.diamond
.multiMap {
it ->
reads: [it[0] + it[2], it[1]]
db: it[3]
}
2022-04-29 19:59:42 +00:00
DIAMOND_BLASTX ( ch_input_for_diamond.reads, ch_input_for_diamond.db, params.diamond_output_format )
ch_versions = ch_versions.mix( DIAMOND_BLASTX.out.versions.first() )
ch_raw_profiles = ch_raw_profiles.mix( DIAMOND_BLASTX.out.output )
2022-05-01 05:24:58 +00:00
2022-04-29 19:59:42 +00:00
}
2022-05-23 12:05:06 +00:00
if ( params.run_motus ) {
ch_input_for_motus = ch_input_for_profiling.motus
2022-05-24 12:42:18 +00:00
.filter{
if (it[0].is_fasta) log.warn "[nf-core/taxprofiler] mOTUs currently does not accept FASTA files as input. Skipping mOTUs for sample ${it[0].id}."
!it[0].is_fasta
}
2022-05-23 12:05:06 +00:00
.multiMap {
it ->
reads: [it[0] + it[2], it[1]]
db: it[3]
}
MOTUS_PROFILE ( ch_input_for_motus.reads, ch_input_for_motus.db )
ch_versions = ch_versions.mix( MOTUS_PROFILE.out.versions.first() )
ch_raw_profiles = ch_raw_profiles.mix( MOTUS_PROFILE.out.out )
}
2022-04-10 04:43:30 +00:00
emit:
2022-04-16 05:42:30 +00:00
profiles = ch_raw_profiles // channel: [ val(meta), [ reads ] ] - should be text files or biom
2022-04-10 04:43:30 +00:00
versions = ch_versions // channel: [ versions.yml ]
mqc = ch_multiqc_files
}