1
0
Fork 0
mirror of https://github.com/MillironX/taxprofiler.git synced 2024-11-24 01:19:56 +00:00
taxprofiler/workflows/taxprofiler.nf

286 lines
12 KiB
Text
Raw Permalink Normal View History

/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
VALIDATE INPUTS
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/
def summary_params = NfcoreSchema.paramsSummaryMap(workflow, params)
// Validate input parameters
WorkflowTaxprofiler.initialise(params, log)
// TODO nf-core: Add all file path parameters for the pipeline to the list below
// Check input path parameters to see if they exist
2022-05-05 11:20:34 +00:00
def checkPathParamList = [ params.input, params.databases, params.hostremoval_reference,
2022-04-03 05:58:40 +00:00
params.shortread_hostremoval_index, params.multiqc_config
]
for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true) } }
// Check mandatory parameters
if (params.input ) { ch_input = file(params.input) } else { exit 1, 'Input samplesheet not specified!' }
if (params.databases) { ch_databases = file(params.databases) } else { exit 1, 'Input database sheet not specified!' }
2022-05-07 04:09:05 +00:00
2022-05-31 18:11:19 +00:00
if (params.shortread_qc_mergepairs && params.run_malt ) log.warn "[nf-core/taxprofiler] MALT does not accept uncollapsed paired-reads. Pairs will be profiled as separate files."
if (params.shortread_qc_excludeunmerged && !params.shortread_qc_mergepairs) exit 1, "ERROR: [nf-core/taxprofiler] cannot include unmerged reads when merging not turned on. Please specify --shortread_qc_mergepairs"
if ( (params.longread_qc_run_clip || params.longread_qc_run_filter) & !params.perform_longread_qc ) exit 1, "ERROR: [nf-core/taxprofiler] --longread_qc_run_clip or --longread_qc_run_filter requested but quality-control not turned on. Please specify --perform_long_qc"
2022-05-31 18:11:19 +00:00
if (params.shortread_complexityfilter_tool == 'fastp' && ( params.perform_shortread_qc == false || params.shortread_qc_tool != 'fastp' )) exit 1, "ERROR: [nf-core/taxprofiler] cannot use fastp complexity filtering if preprocessing not turned on and/or tool is not fastp. Please specify --perform_shortread_qc and/or --shortread_qc_tool 'fastp'"
2022-05-07 04:09:05 +00:00
2022-05-05 11:20:34 +00:00
if (params.perform_shortread_hostremoval && !params.hostremoval_reference) { exit 1, "ERROR: [nf-core/taxprofiler] --shortread_hostremoval requested but no --hostremoval_reference FASTA supplied. Check input." }
if (!params.hostremoval_reference && params.hostremoval_reference_index) { exit 1, "ERROR: [nf-core/taxprofiler] --shortread_hostremoval_index provided but no --hostremoval_reference FASTA supplied. Check input." }
if (params.hostremoval_reference ) { ch_reference = file(params.hostremoval_reference) }
2022-05-03 07:34:27 +00:00
if (params.shortread_hostremoval_index ) { ch_shortread_reference_index = file(params.shortread_hostremoval_index ) } else { ch_shortread_reference_index = [] }
if (params.longread_hostremoval_index ) { ch_longread_reference_index = file(params.longread_hostremoval_index ) } else { ch_longread_reference_index = [] }
2022-04-03 05:58:40 +00:00
2022-06-03 11:55:17 +00:00
if (params.diamond_save_reads ) log.warn "[nf-core/taxprofiler] DIAMOND only allows output of a single format. As --diamond_save_reads supplied, only aligned reads in SAM format will be produced, no taxonomic profiles will be available."
/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
CONFIG FILES
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/
ch_multiqc_config = file("$projectDir/assets/multiqc_config.yml", checkIfExists: true)
ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath(params.multiqc_config) : Channel.empty()
/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
IMPORT LOCAL MODULES/SUBWORKFLOWS
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/
//
// SUBWORKFLOW: Consisting of a mix of local and nf-core/modules
//
include { INPUT_CHECK } from '../subworkflows/local/input_check'
include { DB_CHECK } from '../subworkflows/local/db_check'
include { SHORTREAD_PREPROCESSING } from '../subworkflows/local/shortread_preprocessing'
include { LONGREAD_PREPROCESSING } from '../subworkflows/local/longread_preprocessing'
2022-04-07 12:00:10 +00:00
include { SHORTREAD_HOSTREMOVAL } from '../subworkflows/local/shortread_hostremoval'
2022-05-03 07:34:27 +00:00
include { LONGREAD_HOSTREMOVAL } from '../subworkflows/local/longread_hostremoval'
include { SHORTREAD_COMPLEXITYFILTERING } from '../subworkflows/local/shortread_complexityfiltering'
include { PROFILING } from '../subworkflows/local/profiling'
include { VISUALIZATION_KRONA } from '../subworkflows/local/visualization_krona'
/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
IMPORT NF-CORE MODULES/SUBWORKFLOWS
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/
//
// MODULE: Installed directly from nf-core/modules
//
include { FASTQC } from '../modules/nf-core/modules/fastqc/main'
include { MULTIQC } from '../modules/nf-core/modules/multiqc/main'
include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/modules/custom/dumpsoftwareversions/main'
include { CAT_FASTQ } from '../modules/nf-core/modules/cat/fastq/main'
/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
RUN MAIN WORKFLOW
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/
// Info required for completion email and summary
def multiqc_report = []
workflow TAXPROFILER {
ch_versions = Channel.empty()
ch_taxprofiler_logo = Channel.fromPath("$projectDir/docs/images/nf-core-taxprofiler_logo_custom_light.png")
2022-03-26 20:54:50 +00:00
/*
SUBWORKFLOW: Read in samplesheet, validate and stage input files
*/
INPUT_CHECK (
ch_input
)
ch_versions = ch_versions.mix(INPUT_CHECK.out.versions)
DB_CHECK (
ch_databases
)
2022-04-10 05:26:20 +00:00
ch_versions = ch_versions.mix(DB_CHECK.out.versions)
2022-03-26 20:54:50 +00:00
/*
MODULE: Run FastQC
*/
2022-04-02 15:07:30 +00:00
ch_input_for_fastqc = INPUT_CHECK.out.fastq.mix( INPUT_CHECK.out.nanopore )
FASTQC (
2022-03-24 14:35:09 +00:00
ch_input_for_fastqc
)
ch_versions = ch_versions.mix(FASTQC.out.versions.first())
2022-03-26 20:54:50 +00:00
/*
SUBWORKFLOW: PERFORM PREPROCESSING
*/
if ( params.perform_shortread_qc ) {
2022-03-21 19:28:09 +00:00
ch_shortreads_preprocessed = SHORTREAD_PREPROCESSING ( INPUT_CHECK.out.fastq ).reads
ch_versions = ch_versions.mix( SHORTREAD_PREPROCESSING.out.versions )
2022-03-21 19:28:09 +00:00
} else {
ch_shortreads_preprocessed = INPUT_CHECK.out.fastq
}
if ( params.perform_longread_qc ) {
ch_longreads_preprocessed = LONGREAD_PREPROCESSING ( INPUT_CHECK.out.nanopore ).reads
.map { it -> [ it[0], [it[1]] ] }
ch_versions = ch_versions.mix( LONGREAD_PREPROCESSING.out.versions )
} else {
ch_longreads_preprocessed = INPUT_CHECK.out.nanopore
}
2022-04-11 11:40:24 +00:00
/*
SUBWORKFLOW: COMPLEXITY FILTERING
*/
2022-05-07 04:09:05 +00:00
// fastp complexity filtering is activated via modules.conf in shortread_preprocessing
if ( params.perform_shortread_complexityfilter && params.shortread_complexityfilter_tool != 'fastp' ) {
ch_shortreads_filtered = SHORTREAD_COMPLEXITYFILTERING ( ch_shortreads_preprocessed ).reads
ch_versions = ch_versions.mix( SHORTREAD_COMPLEXITYFILTERING.out.versions )
} else {
ch_shortreads_filtered = ch_shortreads_preprocessed
}
2022-04-07 12:00:10 +00:00
/*
SUBWORKFLOW: HOST REMOVAL
*/
2022-04-13 12:19:19 +00:00
if ( params.perform_shortread_hostremoval ) {
2022-05-03 07:34:27 +00:00
ch_shortreads_hostremoved = SHORTREAD_HOSTREMOVAL ( ch_shortreads_filtered, ch_reference, ch_shortread_reference_index ).reads
ch_versions = ch_versions.mix(SHORTREAD_HOSTREMOVAL.out.versions)
} else {
2022-04-07 12:00:10 +00:00
ch_shortreads_hostremoved = ch_shortreads_filtered
}
2022-04-11 11:40:24 +00:00
2022-05-03 07:34:27 +00:00
if ( params.perform_longread_hostremoval ) {
ch_longreads_hostremoved = LONGREAD_HOSTREMOVAL ( ch_longreads_preprocessed, ch_reference, ch_longread_reference_index ).reads
ch_versions = ch_versions.mix(LONGREAD_HOSTREMOVAL.out.versions)
} else {
ch_longreads_hostremoved = ch_longreads_preprocessed
}
if ( params.perform_runmerging ) {
2022-04-08 11:09:23 +00:00
2022-04-13 11:59:51 +00:00
ch_reads_for_cat_branch = ch_shortreads_hostremoved
2022-05-03 07:34:27 +00:00
.mix( ch_longreads_hostremoved )
.map {
meta, reads ->
def meta_new = meta.clone()
2022-04-08 10:01:07 +00:00
meta_new.remove('run_accession')
[ meta_new, reads ]
}
2022-04-13 06:26:08 +00:00
.groupTuple()
2022-04-12 07:25:27 +00:00
.map {
meta, reads ->
[ meta, reads.flatten() ]
}
2022-04-08 11:09:23 +00:00
.branch {
2022-04-13 06:26:08 +00:00
meta, reads ->
2022-04-12 07:28:10 +00:00
// we can't concatenate files if there is not a second run, we branch
// here to separate them out, and mix back in after for efficiency
2022-04-13 06:26:08 +00:00
cat: ( meta.single_end && reads.size() > 1 ) || ( !meta.single_end && reads.size() > 2 )
2022-04-08 11:09:23 +00:00
skip: true
}
2022-04-08 18:02:22 +00:00
ch_reads_runmerged = CAT_FASTQ ( ch_reads_for_cat_branch.cat ).reads
.mix( ch_reads_for_cat_branch.skip )
.map {
meta, reads ->
[ meta, [ reads ].flatten() ]
}
.mix( INPUT_CHECK.out.fasta )
ch_versions = ch_versions.mix(CAT_FASTQ.out.versions)
} else {
2022-04-13 11:59:51 +00:00
ch_reads_runmerged = ch_shortreads_hostremoved
2022-05-03 07:34:27 +00:00
.mix( ch_longreads_hostremoved, INPUT_CHECK.out.fasta )
}
2022-03-26 20:54:50 +00:00
/*
SUBWORKFLOW: PROFILING
2022-03-26 20:54:50 +00:00
*/
2022-04-12 08:14:05 +00:00
PROFILING ( ch_reads_runmerged, DB_CHECK.out.dbs )
ch_versions = ch_versions.mix( PROFILING.out.versions )
2022-03-18 15:01:33 +00:00
/*
SUBWORKFLOW: VISUALIZATION_KRONA
*/
if ( params.run_krona ) {
VISUALIZATION_KRONA ( PROFILING.out.classifications, PROFILING.out.profiles, DB_CHECK.out.dbs )
ch_versions = ch_versions.mix( VISUALIZATION_KRONA.out.versions )
}
2022-03-26 20:54:50 +00:00
/*
MODULE: MultiQC
*/
CUSTOM_DUMPSOFTWAREVERSIONS (
2022-04-23 06:36:01 +00:00
ch_versions.unique().collectFile(name: 'collated_versions.yml')
)
workflow_summary = WorkflowTaxprofiler.paramsSummaryMultiqc(workflow, summary_params)
ch_workflow_summary = Channel.value(workflow_summary)
ch_multiqc_files = Channel.empty()
ch_multiqc_files = ch_multiqc_files.mix(Channel.from(ch_multiqc_config))
ch_multiqc_files = ch_multiqc_files.mix(ch_multiqc_custom_config.collect().ifEmpty([]))
ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml'))
ch_multiqc_files = ch_multiqc_files.mix(CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml.collect())
ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}.ifEmpty([]))
ch_multiqc_files = ch_multiqc_files.mix(ch_taxprofiler_logo.ifEmpty([]))
if (params.perform_shortread_qc) {
2022-04-05 11:14:06 +00:00
ch_multiqc_files = ch_multiqc_files.mix( SHORTREAD_PREPROCESSING.out.mqc.collect{it[1]}.ifEmpty([]) )
}
if (params.perform_longread_qc) {
2022-04-05 11:14:06 +00:00
ch_multiqc_files = ch_multiqc_files.mix( LONGREAD_PREPROCESSING.out.mqc.collect{it[1]}.ifEmpty([]) )
}
2022-05-07 04:09:05 +00:00
if (params.perform_shortread_complexityfilter && params.shortread_complexityfilter_tool != 'fastp'){
ch_multiqc_files = ch_multiqc_files.mix( SHORTREAD_COMPLEXITYFILTERING.out.mqc.collect{it[1]}.ifEmpty([]) )
}
2022-04-13 12:19:19 +00:00
if (params.perform_shortread_hostremoval) {
2022-04-07 12:00:10 +00:00
ch_multiqc_files = ch_multiqc_files.mix(SHORTREAD_HOSTREMOVAL.out.mqc.collect{it[1]}.ifEmpty([]))
}
2022-04-11 11:40:24 +00:00
ch_multiqc_files = ch_multiqc_files.mix( PROFILING.out.mqc )
// TODO create multiQC module for metaphlan
MULTIQC (
ch_multiqc_files.collect()
)
multiqc_report = MULTIQC.out.report.toList()
2022-04-23 06:36:01 +00:00
ch_versions = ch_versions.mix(MULTIQC.out.versions)
}
/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
COMPLETION EMAIL AND SUMMARY
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/
workflow.onComplete {
if (params.email || params.email_on_fail) {
NfcoreTemplate.email(workflow, params, summary_params, projectDir, log, multiqc_report)
}
NfcoreTemplate.summary(workflow, params, log)
}
/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
THE END
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/