From 6895e457682d27107b33829fe7f2115ee5b10355 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Thu, 23 Mar 2023 15:46:02 +0100 Subject: [PATCH] Fix a few conditional bugs and related --- conf/modules.config | 80 ++++++++++++++++++++++++++-------------- docs/output.md | 19 +++++----- nextflow.config | 2 +- workflows/taxprofiler.nf | 9 +++-- 4 files changed, 70 insertions(+), 40 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 5662cb4..0e7e2ea 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -73,10 +73,10 @@ process { pattern: '*.{log,html,json}' ], [ - path: { "${params.outdir}/analysis_ready_reads" }, + path: { "${params.outdir}/analysis_ready_fastqs" }, mode: params.publish_dir_mode, pattern: '*.fastq.gz', - enabled: !params.perform_runmerging && !params.perform_shortread_hostremoval && !params.shortread_complexityfilter_tool && !params.perform_shortread_qc && params.save_analysis_ready_reads + enabled: !params.perform_runmerging && !params.perform_shortread_hostremoval && !params.shortread_complexityfilter_tool && params.perform_shortread_qc && params.save_analysis_ready_fastqs ] ] } @@ -107,10 +107,10 @@ process { pattern: '*.{log,html,json}' ], [ - path: { "${params.outdir}/analysis_ready_reads" }, + path: { "${params.outdir}/analysis_ready_fastqs" }, mode: params.publish_dir_mode, pattern: '*.fastq.gz', - enabled: !params.perform_runmerging && !params.perform_shortread_hostremoval && !params.shortread_complexityfilter_tool && !params.perform_shortread_qc && params.save_analysis_ready_reads + enabled: !params.perform_runmerging && !params.perform_shortread_hostremoval && !params.shortread_complexityfilter_tool && params.perform_shortread_qc && params.save_analysis_ready_fastqs ] ] } @@ -136,10 +136,10 @@ process { pattern: '*.settings' ], [ - path: { "${params.outdir}/analysis_ready_reads" }, + path: { "${params.outdir}/analysis_ready_fastqs" }, mode: params.publish_dir_mode, pattern: '*.fastq.gz', - enabled: !params.perform_runmerging && !params.perform_shortread_hostremoval && !params.shortread_complexityfilter_tool && !params.perform_shortread_qc && params.save_analysis_ready_reads + enabled: !params.perform_runmerging && !params.perform_shortread_hostremoval && !params.shortread_complexityfilter_tool && params.perform_shortread_qc && params.save_analysis_ready_fastqs ] ] } @@ -166,12 +166,19 @@ process { path: { "${params.outdir}/adapterremoval" }, mode: params.publish_dir_mode, pattern: '*.settings' - ], + ] + ] + } + + // AdapterRemoval separate output merging + withName: CAT_FASTQ { + ext.prefix = { "${meta.id}_${meta.run_accession}" } + publishDir = [ [ - path: { "${params.outdir}/analysis_ready_reads" }, + path: { "${params.outdir}/analysis_ready_fastqs" }, mode: params.publish_dir_mode, pattern: '*.fastq.gz', - enabled: !params.perform_runmerging && !params.perform_shortread_hostremoval && !params.shortread_complexityfilter_tool && !params.perform_shortread_qc && params.save_analysis_ready_reads + enabled: !params.perform_runmerging && !params.perform_shortread_hostremoval && !params.shortread_complexityfilter_tool && params.perform_shortread_qc && params.save_analysis_ready_fastqs ] ] } @@ -189,8 +196,13 @@ process { path: { "${params.outdir}/porechop" }, mode: params.publish_dir_mode, pattern: '*.log' + ], + [ + path: { "${params.outdir}/analysis_ready_fastqs" }, + mode: params.publish_dir_mode, + pattern: '*_porechopped.fastq.gz', + enabled: !params.perform_runmerging && !params.perform_longread_hostremoval && params.longread_qc_skipqualityfilter && !params.longread_qc_skipadaptertrim && params.perform_longread_qc && params.save_analysis_ready_fastqs ] - // TODO ANALYSIS READY ] } @@ -213,9 +225,14 @@ process { path: { "${params.outdir}/filtlong" }, mode: params.publish_dir_mode, pattern: '*.log' + ], + [ + path: { "${params.outdir}/analysis_ready_fastqs" }, + mode: params.publish_dir_mode, + pattern: '*.fastq.gz', + enabled: !params.perform_runmerging && !params.perform_longread_hostremoval && !params.longread_qc_skipqualityfilter && params.perform_longread_qc && params.save_analysis_ready_fastqs ] ] - // TODO ANALYSIS READY } withName: BBMAP_BBDUK { @@ -238,10 +255,10 @@ process { pattern: '*.log' ], [ - path: { "${params.outdir}/analysis_ready_reads" }, + path: { "${params.outdir}/analysis_ready_fastqs" }, mode: params.publish_dir_mode, pattern: '*.fastq.gz', - enabled: !params.perform_runmerging && !params.perform_shortread_hostremoval && params.shortread_complexityfilter_tool && params.save_analysis_ready_reads + enabled: !params.perform_runmerging && !params.perform_shortread_hostremoval && params.shortread_complexityfilter_tool && params.save_analysis_ready_fastqs ] ] } @@ -265,10 +282,10 @@ process { pattern: '*.log' ], [ - path: { "${params.outdir}/analysis_ready_reads" }, + path: { "${params.outdir}/analysis_ready_fastqs" }, mode: params.publish_dir_mode, pattern: '*.fastq.gz', - enabled: !params.perform_runmerging && !params.perform_shortread_hostremoval && params.shortread_complexityfilter_tool && params.save_analysis_ready_reads + enabled: !params.perform_runmerging && !params.perform_shortread_hostremoval && params.shortread_complexityfilter_tool && params.save_analysis_ready_fastqs ] ] } @@ -282,10 +299,10 @@ process { enabled: params.save_hostremoval_index ], [ - path: { "${params.outdir}/analysis_ready_reads" }, + path: { "${params.outdir}/analysis_ready_fastqs" }, mode: params.publish_dir_mode, pattern: '*.fastq.gz', - enabled: !params.perform_runmerging && !params.perform_shortread_hostremoval && !params.shortread_complexityfilter_tool && !params.perform_shortread_qc && params.save_analysis_ready_reads + enabled: !params.perform_runmerging && !params.perform_shortread_hostremoval && !params.shortread_complexityfilter_tool && !params.perform_shortread_qc && params.save_analysis_ready_fastqs ] ] } @@ -312,10 +329,11 @@ process { enabled: params.save_hostremoval_unmapped ], [ - path: { "${params.outdir}/analysis_ready_reads" }, + path: { "${params.outdir}/analysis_ready_fastqs" }, mode: params.publish_dir_mode, + enabled: params.perform_shortread_hostremoval, pattern: '*.fastq.gz', - saveAs: { (( !params.perform_runmerging | params.perform_runmerging && !${meta.is_multirun} ) && params.perform_shortread_hostremoval && params.save_analysis_ready_reads ) ? it : null } + saveAs: { ( params.perform_runmerging == false || params.perform_runmerging && !meta.is_multirun ) && params.perform_shortread_hostremoval && params.save_analysis_ready_fastqs ? it : null } ] ] } @@ -338,7 +356,6 @@ process { pattern: '*.bam', enabled: params.save_hostremoval_bam ] - // TODO ANALYSIS READY READS } withName: SAMTOOLS_VIEW { @@ -349,10 +366,19 @@ process { withName: SAMTOOLS_BAM2FQ { ext.prefix = { "${meta.id}_${meta.run_accession}.unmapped" } publishDir = [ - path: { "${params.outdir}/samtools/bam2fq" }, - mode: params.publish_dir_mode, - pattern: '*.fq.gz', - enabled: params.save_hostremoval_unmapped + [ + path: { "${params.outdir}/samtools/bam2fq" }, + mode: params.publish_dir_mode, + pattern: '*.fq.gz', + enabled: params.save_hostremoval_unmapped + ], + [ + path: { "${params.outdir}/analysis_ready_fastqs" }, + mode: params.publish_dir_mode, + pattern: '*.fastq.gz', + enabled: params.save_analysis_ready_fastqs, + saveAs: { ( !params.perform_runmerging | params.perform_runmerging && !meta.is_multirun ) && params.perform_longread_hostremoval && params.save_analysis_ready_fastqs ? it : null } + ] ] } @@ -365,7 +391,7 @@ process { ] } - withName: CAT_FASTQ { + withName: MERGE_RUNS { ext.prefix = { "${meta.id}" } publishDir = [ [ @@ -375,10 +401,10 @@ process { enabled: params.save_runmerged_reads ], [ - path: { "${params.outdir}/analysis_ready_reads" }, + path: { "${params.outdir}/analysis_ready_fastqs" }, mode: params.publish_dir_mode, pattern: '*.fastq.gz', - enabled: params.perform_runmerging && params.save_analysis_ready_reads + enabled: params.perform_runmerging && params.save_analysis_ready_fastqs ] ] } diff --git a/docs/output.md b/docs/output.md index 4ef6770..3936344 100644 --- a/docs/output.md +++ b/docs/output.md @@ -102,7 +102,7 @@ You can change the default value for low complexity filtering by using the argum By default nf-core/taxprofiler will only provide the `.settings` file if AdapterRemoval is selected. -You will only find the `.fastq` files in the results directory if you provide ` --save_preprocessed_reads`. If this is selected, you may receive different combinations of `.fastq` files for each sample depending on the input types - e.g. whether you have merged or not, or if you're supplying both single- and paired-end reads. +You will only find the `.fastq` files in the results directory if you provide ` --save_preprocessed_reads`. If this is selected, you may receive different combinations of `.fastq` files for each sample depending on the input types - e.g. whether you have merged or not, or if you're supplying both single- and paired-end reads. Alternatively, if you wish only to have the 'final' reads that go into classification/profiling (i.e., that may have additional processing), do not specify this flag but rather specify `--save_analysis_ready_reads`, in which case the reads will be in the folder `analysis_ready_reads` > ⚠️ The resulting `.fastq` files may _not_ always be the 'final' reads that go into taxprofiling, if you also run other steps such as complexity filtering, host removal, run merging etc.. @@ -121,7 +121,7 @@ You will only find the `.fastq` files in the results directory if you provide ` The output logs are saved in the output folder and are part of MultiQC report.You do not normally need to check these manually. -You will only find the `.fastq` files in the results directory if you provide ` --save_preprocessed_reads`. +You will only find the `.fastq` files in the results directory if you provide ` --save_preprocessed_reads`. Alternatively, if you wish only to have the 'final' reads that go into classification/profiling (i.e., that may have additional processing), do not specify this flag but rather specify `--save_analysis_ready_reads`, in which case the reads will be in the folder `analysis_ready_reads` > ⚠️ We do **not** recommend using Porechop if you are already trimming the adapters with ONT's basecaller Guppy. @@ -140,7 +140,7 @@ It is used in nf-core/taxprofiler for complexity filtering using different algor -By default nf-core/taxprofiler will only provide the `.log` file if BBDuk is selected as the complexity filtering tool. You will only find the complexity filtered reads in your results directory if you provide ` --save_complexityfiltered_reads` . +By default nf-core/taxprofiler will only provide the `.log` file if BBDuk is selected as the complexity filtering tool. You will only find the complexity filtered reads in your results directory if you provide ` --save_complexityfiltered_reads`. Alternatively, if you wish only to have the 'final' reads that go into classification/profiling (i.e., that may have additional processing), do not specify this flag but rather specify `--save_analysis_ready_reads`, in which case the reads will be in the folder `analysis_ready_reads` > ⚠️ The resulting `.fastq` files may _not_ always be the 'final' reads that go into taxprofiling, if you also run other steps such as host removal, run merging etc.. @@ -159,7 +159,7 @@ It is used in nf-core/taxprofiler for complexity filtering using different algor -By default nf-core/taxprofiler will only provide the `.log` file if PRINSEQ++ is selected as the complexity filtering tool. You will only find the complexity filtered `.fastq` files in your results directory if you supply ` --save_complexityfiltered_reads` . +By default nf-core/taxprofiler will only provide the `.log` file if PRINSEQ++ is selected as the complexity filtering tool. You will only find the complexity filtered `.fastq` files in your results directory if you supply ` --save_complexityfiltered_reads`. Alternatively, if you wish only to have the 'final' reads that go into classification/profiling (i.e., that may have additional processing), do not specify this flag but rather specify `--save_analysis_ready_reads`, in which case the reads will be in the folder `analysis_ready_reads` > ⚠️ The resulting `.fastq` files may _not_ always be the 'final' reads that go into taxprofiling, if you also run other steps such as host removal, run merging etc.. @@ -176,7 +176,7 @@ By default nf-core/taxprofiler will only provide the `.log` file if PRINSEQ++ is -You will only find the `.fastq` files in the results directory if you provide ` --save_preprocessed_reads`. +You will only find the `.fastq` files in the results directory if you provide ` --save_preprocessed_reads`. Alternatively, if you wish only to have the 'final' reads that go into classification/profiling (i.e., that may have additional processing), do not specify this flag but rather specify `--save_analysis_ready_reads`, in which case the reads will be in the folder `analysis_ready_reads` > ⚠️ We do **not** recommend using Filtlong if you are performing filtering of low quality reads with ONT's basecaller Guppy. @@ -199,7 +199,7 @@ It is used with nf-core/taxprofiler to allow removal of 'host' (e.g. human) and/ -By default nf-core/taxprofiler will only provide the `.log` file if host removal is turned on. You will only have a `.bam` file if you specify `--save_hostremoval_bam`. This will contain _both_ mapped and unmapped reads. You will only get FASTQ files if you specify to save `--save_hostremoval_unmapped` - these contain only unmapped reads. +By default nf-core/taxprofiler will only provide the `.log` file if host removal is turned on. You will only have a `.bam` file if you specify `--save_hostremoval_bam`. This will contain _both_ mapped and unmapped reads. You will only get FASTQ files if you specify to save `--save_hostremoval_unmapped` - these contain only unmapped reads. Alternatively, if you wish only to have the 'final' reads that go into classification/profiling (i.e., that may have additional processing), do not specify this flag but rather specify `--save_analysis_ready_reads`, in which case the reads will be in the folder `analysis_ready_reads` > ℹ️ Unmapped reads in FASTQ are only found in this directory for short-reads, for long-reads see [`samtools/bam2fq/`](#samtools-bam2fq) @@ -242,7 +242,7 @@ By default, nf-core/taxprofiler will only provide the `.bam` file containing map -This directory will be present and contain the unmapped reads from the `.fastq` format from long-read minimap2 host removal, if `--save_hostremoval_unmapped` is supplied +This directory will be present and contain the unmapped reads from the `.fastq` format from long-read minimap2 host removal, if `--save_hostremoval_unmapped` is supplied. Alternatively, if you wish only to have the 'final' reads that go into classification/profiling (i.e., that may have additional processing), do not specify this flag but rather specify `--save_analysis_ready_reads`, in which case the reads will be in the folder `analysis_ready_reads` > ℹ️ For short-read unmapped reads, see [bowtie2](#bowtie2). @@ -264,7 +264,7 @@ In most cases you do not need to check this file, as it is rendered in the Multi nf-core/taxprofiler offers the option to merge FASTQ files of multiple sequencing runs or libraries that derive from the same sample, as specified in the input samplesheet. -This is the last preprocessing step, so if you have multiple runs or libraries (and run merging turned on), this will represent the final reads that will go into classification/profiling steps. +This is the last possible preprocessing step, so if you have multiple runs or libraries (and run merging turned on), this will represent the final reads that will go into classification/profiling steps.
Output files @@ -274,9 +274,10 @@ This is the last preprocessing step, so if you have multiple runs or libraries (
+ Note that you will only find samples that went through the run merging step in this directory. For samples that had a single run or library will not go through this step of the pipeline and thus will not be present in this directory. -⚠️ You must make sure to turn on the saving of the reads from the previous preprocessing step you may have turned on, if you have single-run or library reads in your pipeline run, and wish to save the final reads that go into classification/profiling! +This directory and it's FASTQ files will only be present if you supply `--save_runmerged_reads`.Alternatively, if you wish only to have the 'final' reads that go into classification/profiling (i.e., that may have additional processing), do not specify this flag but rather specify `--save_analysis_ready_reads`, in which case the reads will be in the folder `analysis_ready_reads` ### Bracken diff --git a/nextflow.config b/nextflow.config index 0e6850f..0590334 100644 --- a/nextflow.config +++ b/nextflow.config @@ -106,7 +106,7 @@ params { save_hostremoval_unmapped = false // Publishing final reads going into profiling - save_analysis_ready_reads = false + save_analysis_ready_fastqs = false // MALT run_malt = false diff --git a/workflows/taxprofiler.nf b/workflows/taxprofiler.nf index e44015f..54a2bad 100644 --- a/workflows/taxprofiler.nf +++ b/workflows/taxprofiler.nf @@ -91,7 +91,7 @@ include { FASTQC } from '../modules/nf-core/fastqc/main' include { FALCO } from '../modules/nf-core/falco/main' include { MULTIQC } from '../modules/nf-core/multiqc/main' include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main' -include { CAT_FASTQ } from '../modules/nf-core/cat/fastq/main' +include { CAT_FASTQ as MERGE_RUNS } from '../modules/nf-core/cat/fastq/main' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -121,6 +121,9 @@ workflow TAXPROFILER { ) ch_versions = ch_versions.mix(INPUT_CHECK.out.versions) + // Save final FASTA reads if requested, as otherwise no processing occurs on FASTA + + DB_CHECK ( ch_databases ) @@ -210,7 +213,7 @@ workflow TAXPROFILER { skip: true } - ch_reads_runmerged = CAT_FASTQ ( ch_reads_for_cat_branch.cat ).reads + ch_reads_runmerged = MERGE_RUNS ( ch_reads_for_cat_branch.cat ).reads .mix( ch_reads_for_cat_branch.skip ) .map { meta, reads -> @@ -218,7 +221,7 @@ workflow TAXPROFILER { } .mix( INPUT_CHECK.out.fasta ) - ch_versions = ch_versions.mix(CAT_FASTQ.out.versions) + ch_versions = ch_versions.mix(MERGE_RUNS.out.versions) } else { ch_reads_runmerged = ch_shortreads_hostremoved