1
0
Fork 0
mirror of https://github.com/MillironX/taxprofiler.git synced 2024-11-22 18:19:54 +00:00
taxprofiler/workflows/taxprofiler.nf
James A. Fellows Yates 1f31f88635
Merge pull request #158 from nf-core/adapterremoval-fix
Fix adapterremoval adapter conflict and add optional adapter list file
2022-11-03 11:36:14 +01:00

333 lines
15 KiB
Text

/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
VALIDATE INPUTS
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/
def summary_params = NfcoreSchema.paramsSummaryMap(workflow, params)
// Validate input parameters
WorkflowTaxprofiler.initialise(params, log)
// TODO nf-core: Add all file path parameters for the pipeline to the list below
// Check input path parameters to see if they exist
def checkPathParamList = [ params.input, params.databases, params.hostremoval_reference,
params.shortread_hostremoval_index, params.multiqc_config,
params.shortread_qc_adapterlist
]
for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true) } }
// Check mandatory parameters
if ( params.input ) {
ch_input = file(params.input, checkIfExists: true)
pep_input_base_dir = file(params.input).extension.matches("yaml|yml") ? file(file(params.input).getParent(), checkIfExists: true) : []
} else {
exit 1, "Input samplesheet, or PEP config and base directory not specified"
}
if (params.databases) { ch_databases = file(params.databases) } else { exit 1, 'Input database sheet not specified!' }
if (params.shortread_qc_mergepairs && params.run_malt ) log.warn "[nf-core/taxprofiler] MALT does not accept uncollapsed paired-reads. Pairs will be profiled as separate files."
if (params.shortread_qc_excludeunmerged && !params.shortread_qc_mergepairs) exit 1, "ERROR: [nf-core/taxprofiler] cannot include unmerged reads when merging not turned on. Please specify --shortread_qc_mergepairs"
if (params.shortread_complexityfilter_tool == 'fastp' && ( params.perform_shortread_qc == false || params.shortread_qc_tool != 'fastp' )) exit 1, "ERROR: [nf-core/taxprofiler] cannot use fastp complexity filtering if preprocessing not turned on and/or tool is not fastp. Please specify --perform_shortread_qc and/or --shortread_qc_tool 'fastp'"
if (params.perform_shortread_hostremoval && !params.hostremoval_reference) { exit 1, "ERROR: [nf-core/taxprofiler] --shortread_hostremoval requested but no --hostremoval_reference FASTA supplied. Check input." }
if (!params.hostremoval_reference && params.hostremoval_reference_index) { exit 1, "ERROR: [nf-core/taxprofiler] --shortread_hostremoval_index provided but no --hostremoval_reference FASTA supplied. Check input." }
if (params.hostremoval_reference ) { ch_reference = file(params.hostremoval_reference) }
if (params.shortread_hostremoval_index ) { ch_shortread_reference_index = file(params.shortread_hostremoval_index ) } else { ch_shortread_reference_index = [] }
if (params.longread_hostremoval_index ) { ch_longread_reference_index = file(params.longread_hostremoval_index ) } else { ch_longread_reference_index = [] }
if (params.diamond_save_reads ) log.warn "[nf-core/taxprofiler] DIAMOND only allows output of a single format. As --diamond_save_reads supplied, only aligned reads in SAM format will be produced, no taxonomic profiles will be available."
if (params.run_malt && params.run_krona && !params.krona_taxonomy_directory) log.warn "[nf-core/taxprofiler] Krona can only be run on MALT output if path to Krona taxonomy database supplied to --krona_taxonomy_directory. Krona will not be executed in this run for MALT."
if (params.run_bracken && !params.run_kraken2) exit 1, 'ERROR: [nf-core/taxprofiler] You are attempting to run Bracken without running kraken2. This is not possible! Please set --run_kraken2 as well.'
/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
CONFIG FILES
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/
ch_multiqc_config = Channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true)
ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath( params.multiqc_config, checkIfExists: true ) : Channel.empty()
ch_multiqc_logo = params.multiqc_logo ? Channel.fromPath( params.multiqc_logo, checkIfExists: true ) : Channel.empty()
ch_multiqc_custom_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("$projectDir/assets/methods_description_template.yml", checkIfExists: true)
/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
IMPORT LOCAL MODULES/SUBWORKFLOWS
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/
//
// SUBWORKFLOW: Consisting of a mix of local and nf-core/modules
//
include { INPUT_CHECK } from '../subworkflows/local/input_check'
include { DB_CHECK } from '../subworkflows/local/db_check'
include { SHORTREAD_PREPROCESSING } from '../subworkflows/local/shortread_preprocessing'
include { LONGREAD_PREPROCESSING } from '../subworkflows/local/longread_preprocessing'
include { SHORTREAD_HOSTREMOVAL } from '../subworkflows/local/shortread_hostremoval'
include { LONGREAD_HOSTREMOVAL } from '../subworkflows/local/longread_hostremoval'
include { SHORTREAD_COMPLEXITYFILTERING } from '../subworkflows/local/shortread_complexityfiltering'
include { PROFILING } from '../subworkflows/local/profiling'
include { VISUALIZATION_KRONA } from '../subworkflows/local/visualization_krona'
include { STANDARDISATION_PROFILES } from '../subworkflows/local/standardisation_profiles'
/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
IMPORT NF-CORE MODULES/SUBWORKFLOWS
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/
//
// MODULE: Installed directly from nf-core/modules
//
include { FASTQC } from '../modules/nf-core/fastqc/main'
include { FALCO } from '../modules/nf-core/falco/main'
include { MULTIQC } from '../modules/nf-core/multiqc/main'
include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main'
include { CAT_FASTQ } from '../modules/nf-core/cat/fastq/main'
/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
RUN MAIN WORKFLOW
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/
// Info required for completion email and summary
def multiqc_report = []
workflow TAXPROFILER {
ch_versions = Channel.empty()
ch_multiqc_logo= Channel.fromPath("$projectDir/docs/images/nf-core-taxprofiler_logo_custom_light.png")
adapterlist = params.shortread_qc_adapterlist ? file(params.shortread_qc_adapterlist) : []
if ( params.shortread_qc_adapterlist ) {
if ( params.shortread_qc_tool == 'adapterremoval' && !(adapterlist.extension == 'txt') ) error "[nf-core/taxprofiler] ERROR: AdapterRemoval2 adapter list requires a `.txt` format and extension. Check input: --shortread_qc_adapterlist ${params.shortread_qc_adapterlist}"
if ( params.shortread_qc_tool == 'fastp' && !adapterlist.extension.matches(".*(fa|fasta|fna|fas)") ) error "[nf-core/taxprofiler] ERROR: fastp adapter list requires a `.fasta` format and extension (or fa, fas, fna). Check input: --shortread_qc_adapterlist ${params.shortread_qc_adapterlist}"
}
/*
SUBWORKFLOW: Read in samplesheet, validate and stage input files
*/
INPUT_CHECK (
ch_input, pep_input_base_dir
)
ch_versions = ch_versions.mix(INPUT_CHECK.out.versions)
DB_CHECK (
ch_databases
)
ch_versions = ch_versions.mix(DB_CHECK.out.versions)
/*
MODULE: Run FastQC
*/
ch_input_for_fastqc = INPUT_CHECK.out.fastq.mix( INPUT_CHECK.out.nanopore )
if ( params.preprocessing_qc_tool == 'falco' ) {
FALCO ( ch_input_for_fastqc )
ch_versions = ch_versions.mix(FALCO.out.versions.first())
} else {
FASTQC ( ch_input_for_fastqc )
ch_versions = ch_versions.mix(FASTQC.out.versions.first())
}
/*
SUBWORKFLOW: PERFORM PREPROCESSING
*/
if ( params.perform_shortread_qc ) {
ch_shortreads_preprocessed = SHORTREAD_PREPROCESSING ( INPUT_CHECK.out.fastq, adapterlist ).reads
ch_versions = ch_versions.mix( SHORTREAD_PREPROCESSING.out.versions )
} else {
ch_shortreads_preprocessed = INPUT_CHECK.out.fastq
}
if ( params.perform_longread_qc ) {
ch_longreads_preprocessed = LONGREAD_PREPROCESSING ( INPUT_CHECK.out.nanopore ).reads
.map { it -> [ it[0], [it[1]] ] }
ch_versions = ch_versions.mix( LONGREAD_PREPROCESSING.out.versions )
} else {
ch_longreads_preprocessed = INPUT_CHECK.out.nanopore
}
/*
SUBWORKFLOW: COMPLEXITY FILTERING
*/
// fastp complexity filtering is activated via modules.conf in shortread_preprocessing
if ( params.perform_shortread_complexityfilter && params.shortread_complexityfilter_tool != 'fastp' ) {
ch_shortreads_filtered = SHORTREAD_COMPLEXITYFILTERING ( ch_shortreads_preprocessed ).reads
ch_versions = ch_versions.mix( SHORTREAD_COMPLEXITYFILTERING.out.versions )
} else {
ch_shortreads_filtered = ch_shortreads_preprocessed
}
/*
SUBWORKFLOW: HOST REMOVAL
*/
if ( params.perform_shortread_hostremoval ) {
ch_shortreads_hostremoved = SHORTREAD_HOSTREMOVAL ( ch_shortreads_filtered, ch_reference, ch_shortread_reference_index ).reads
ch_versions = ch_versions.mix(SHORTREAD_HOSTREMOVAL.out.versions)
} else {
ch_shortreads_hostremoved = ch_shortreads_filtered
}
if ( params.perform_longread_hostremoval ) {
ch_longreads_hostremoved = LONGREAD_HOSTREMOVAL ( ch_longreads_preprocessed, ch_reference, ch_longread_reference_index ).reads
ch_versions = ch_versions.mix(LONGREAD_HOSTREMOVAL.out.versions)
} else {
ch_longreads_hostremoved = ch_longreads_preprocessed
}
if ( params.perform_runmerging ) {
ch_reads_for_cat_branch = ch_shortreads_hostremoved
.mix( ch_longreads_hostremoved )
.map {
meta, reads ->
def meta_new = meta.clone()
meta_new.remove('run_accession')
[ meta_new, reads ]
}
.groupTuple()
.map {
meta, reads ->
[ meta, reads.flatten() ]
}
.branch {
meta, reads ->
// we can't concatenate files if there is not a second run, we branch
// here to separate them out, and mix back in after for efficiency
cat: ( meta.single_end && reads.size() > 1 ) || ( !meta.single_end && reads.size() > 2 )
skip: true
}
ch_reads_runmerged = CAT_FASTQ ( ch_reads_for_cat_branch.cat ).reads
.mix( ch_reads_for_cat_branch.skip )
.map {
meta, reads ->
[ meta, [ reads ].flatten() ]
}
.mix( INPUT_CHECK.out.fasta )
ch_versions = ch_versions.mix(CAT_FASTQ.out.versions)
} else {
ch_reads_runmerged = ch_shortreads_hostremoved
.mix( ch_longreads_hostremoved, INPUT_CHECK.out.fasta )
}
/*
SUBWORKFLOW: PROFILING
*/
PROFILING ( ch_reads_runmerged, DB_CHECK.out.dbs )
ch_versions = ch_versions.mix( PROFILING.out.versions )
/*
SUBWORKFLOW: VISUALIZATION_KRONA
*/
if ( params.run_krona ) {
VISUALIZATION_KRONA ( PROFILING.out.classifications, PROFILING.out.profiles, DB_CHECK.out.dbs )
ch_versions = ch_versions.mix( VISUALIZATION_KRONA.out.versions )
}
/*
SUBWORKFLOW: PROFILING STANDARDISATION
*/
if ( params.run_profile_standardisation ) {
STANDARDISATION_PROFILES ( PROFILING.out.classifications, PROFILING.out.profiles, DB_CHECK.out.dbs, PROFILING.out.motus_version )
ch_versions = ch_versions.mix( STANDARDISATION_PROFILES.out.versions )
}
/*
MODULE: MultiQC
*/
CUSTOM_DUMPSOFTWAREVERSIONS (
ch_versions.unique().collectFile(name: 'collated_versions.yml')
)
workflow_summary = WorkflowTaxprofiler.paramsSummaryMultiqc(workflow, summary_params)
ch_workflow_summary = Channel.value(workflow_summary)
methods_description = WorkflowTaxprofiler.methodsDescriptionText(workflow, ch_multiqc_custom_methods_description)
ch_methods_description = Channel.value(methods_description)
ch_multiqc_files = Channel.empty()
ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml'))
ch_multiqc_files = ch_multiqc_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml'))
ch_multiqc_files = ch_multiqc_files.mix(CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml.collect())
if ( params.preprocessing_qc_tool == 'falco' ) {
ch_multiqc_files = ch_multiqc_files.mix(FALCO.out.txt.collect{it[1]}.ifEmpty([]))
} else {
ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}.ifEmpty([]))
}
if (params.perform_shortread_qc) {
ch_multiqc_files = ch_multiqc_files.mix( SHORTREAD_PREPROCESSING.out.mqc.collect{it[1]}.ifEmpty([]) )
}
if (params.perform_longread_qc) {
ch_multiqc_files = ch_multiqc_files.mix( LONGREAD_PREPROCESSING.out.mqc.collect{it[1]}.ifEmpty([]) )
}
if (params.perform_shortread_complexityfilter && params.shortread_complexityfilter_tool != 'fastp'){
ch_multiqc_files = ch_multiqc_files.mix( SHORTREAD_COMPLEXITYFILTERING.out.mqc.collect{it[1]}.ifEmpty([]) )
}
if (params.perform_shortread_hostremoval) {
ch_multiqc_files = ch_multiqc_files.mix(SHORTREAD_HOSTREMOVAL.out.mqc.collect{it[1]}.ifEmpty([]))
}
if (params.perform_longread_hostremoval) {
ch_multiqc_files = ch_multiqc_files.mix(LONGREAD_HOSTREMOVAL.out.mqc.collect{it[1]}.ifEmpty([]))
}
ch_multiqc_files = ch_multiqc_files.mix( PROFILING.out.mqc.collect{it[1]}.ifEmpty([]) )
if ( params.run_profile_standardisation ) {
ch_multiqc_files = ch_multiqc_files.mix( STANDARDISATION_PROFILES.out.mqc.collect{it[1]}.ifEmpty([]) )
}
// TODO create multiQC module for metaphlan
MULTIQC (
ch_multiqc_files.collect(),
ch_multiqc_config.collect().ifEmpty([]),
ch_multiqc_custom_config.collect().ifEmpty([]),
ch_multiqc_logo.collect().ifEmpty([])
)
multiqc_report = MULTIQC.out.report.toList()
ch_versions = ch_versions.mix(MULTIQC.out.versions)
}
/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
COMPLETION EMAIL AND SUMMARY
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/
workflow.onComplete {
if (params.email || params.email_on_fail) {
NfcoreTemplate.email(workflow, params, summary_params, projectDir, log, multiqc_report)
}
NfcoreTemplate.summary(workflow, params, log)
if (params.hook_url) {
NfcoreTemplate.adaptivecard(workflow, params, summary_params, projectDir, log)
}
}
/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
THE END
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/