From 511cb16a6119fe746950abc5fed4100517a41775 Mon Sep 17 00:00:00 2001 From: Sofia Stamouli Date: Thu, 20 Oct 2022 16:55:57 +0200 Subject: [PATCH] Apply review suggestions --- .github/workflows/ci.yml | 3 +- CITATIONS.md | 2 +- README.md | 2 +- nextflow.config | 2 + nextflow_schema.json | 62 ++++++++++++++++--- subworkflows/local/longread_preprocessing.nf | 10 +-- subworkflows/local/shortread_preprocessing.nf | 11 ++-- workflows/taxprofiler.nf | 7 ++- 8 files changed, 75 insertions(+), 24 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 344505a..12185fe 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -23,7 +23,8 @@ jobs: - "21.10.3" - "latest-everything" parameters: - - "--perform_fastqc_alternative false" + - "--preprocessing_qc_tool fastqc" + - "--preprocessing_qc_tool falco" - "--perform_longread_qc false" - "--perform_shortread_qc false" - "--shortread_qc_tool fastp" diff --git a/CITATIONS.md b/CITATIONS.md index 510c74b..599e049 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -62,7 +62,7 @@ - [FILTLONG](https://github.com/rrwick/Filtlong) -- [Falco](https://doi.org/10.12688/f1000research.21142.2) +- [falco](https://doi.org/10.12688/f1000research.21142.2) > de Sena Brandine G and Smith AD. Falco: high-speed FastQC emulation for quality control of sequencing data. F1000Research 2021, 8:1874 diff --git a/README.md b/README.md index e17c7b5..3f0a840 100644 --- a/README.md +++ b/README.md @@ -30,7 +30,7 @@ On release, automated continuous integration tests run the pipeline on a full-si ![](docs/images/taxprofiler_tube.png) -1. Read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/) or [`Falco`](https://github.com/smithlabcode/falco) as an alternative option) +1. Read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/) or [`falco`](https://github.com/smithlabcode/falco) as an alternative option) 2. Performs optional read pre-processing - Adapter clipping and merging (short-read: [fastp](https://github.com/OpenGene/fastp), [AdapterRemoval2](https://github.com/MikkelSchubert/adapterremoval); long-read: [porechop](https://github.com/rrwick/Porechop)) - Low complexity and quality filtering (short-read: [bbduk](https://jgi.doe.gov/data-and-tools/software-tools/bbtools/), [PRINSEQ++](https://github.com/Adrian-Cantu/PRINSEQ-plus-plus); long-read: [Filtlong](https://github.com/rrwick/Filtlong)) diff --git a/nextflow.config b/nextflow.config index efb5aff..b69948e 100644 --- a/nextflow.config +++ b/nextflow.config @@ -59,6 +59,8 @@ params { // Databases databases = null + preprocessing_qc_tool = 'fastqc' + // FASTQ preprocessing perform_shortread_qc = false shortread_qc_tool = 'fastp' diff --git a/nextflow_schema.json b/nextflow_schema.json index f88443f..055c368 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -10,7 +10,11 @@ "type": "object", "fa_icon": "fas fa-terminal", "description": "Define where the pipeline should find input data and save output data.", - "required": ["input", "outdir", "databases"], + "required": [ + "input", + "outdir", + "databases" + ], "properties": { "input": { "type": "string", @@ -80,7 +84,10 @@ "shortread_qc_tool": { "type": "string", "default": "fastp", - "enum": ["fastp", "adapterremoval"], + "enum": [ + "fastp", + "adapterremoval" + ], "fa_icon": "fas fa-tools", "description": "Specify which tool to use for short-read QC" }, @@ -133,7 +140,11 @@ "shortread_complexityfilter_tool": { "type": "string", "default": "bbduk", - "enum": ["bbduk", "prinseqplusplus", "fastp"], + "enum": [ + "bbduk", + "prinseqplusplus", + "fastp" + ], "fa_icon": "fas fa-hammer", "description": "Specify which tool to use for complexity filtering" }, @@ -167,7 +178,10 @@ "shortread_complexityfilter_prinseqplusplus_mode": { "type": "string", "default": "entropy", - "enum": ["entropy", "dust"], + "enum": [ + "entropy", + "dust" + ], "fa_icon": "fas fa-check-square", "description": "Specify the complexity filter mode for PRINSEQ++" }, @@ -341,7 +355,15 @@ "diamond_output_format": { "type": "string", "default": "tsv", - "enum": ["blast", "xml", "txt", "daa", "sam", "tsv", "paf"], + "enum": [ + "blast", + "xml", + "txt", + "daa", + "sam", + "tsv", + "paf" + ], "fa_icon": "fas fa-file", "description": "Specify output format from DIAMOND profiling.", "help_text": "DIAMOND can produce output in a number of different formats, you can specify here which to produce.\n\nNote that DIAMOND can only produce one format at a time, and depending on which you pick, some downstream steps may not be executed. For example, selecting `daa` or `sam` will mean you will not get a tabular taxonomic profile as with the other tools.\n\nWill be overriden by `--diamond_save_reads.`\n\n> Modifies tool parameter(s):\n> - diamond blastx: `--outfmt`" @@ -360,7 +382,14 @@ "kaiju_taxon_rank": { "type": "string", "default": "species", - "enum": ["phylum", "class", "order", "family", "genus", "species"], + "enum": [ + "phylum", + "class", + "order", + "family", + "genus", + "species" + ], "fa_icon": "fas fa-tag", "description": "Specify taxonomic rank to be displayed in Kaiju taxon table", "help_text": "Specify the taxonomic level(s) to be displayed in the resulting Kaiju taxon table, as generated by the kaiju2table helper tool.\n\nThis can be either a single level (e.g. `species`), or a comma separated list to display the full taxonomic path (e.g. `superkingdom,phylum,class,order,family,genus,species.`).\n\n> Modifies tool parameter(s):\n> - kaiju2table: `-l`" @@ -555,7 +584,14 @@ "description": "Method used to save pipeline results to output directory.", "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.", "fa_icon": "fas fa-copy", - "enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"], + "enum": [ + "symlink", + "rellink", + "link", + "copy", + "copyNoFollow", + "move" + ], "hidden": true }, "email_on_fail": { @@ -707,5 +743,15 @@ { "$ref": "#/definitions/reference_genome_options" } - ] + ], + "properties": { + "preprocessing_qc_tool": { + "type": "string", + "default": "fastqc", + "enum": [ + "fastqc", + "falco" + ] + } + } } diff --git a/subworkflows/local/longread_preprocessing.nf b/subworkflows/local/longread_preprocessing.nf index cf0d168..d16db3f 100644 --- a/subworkflows/local/longread_preprocessing.nf +++ b/subworkflows/local/longread_preprocessing.nf @@ -54,13 +54,13 @@ workflow LONGREAD_PREPROCESSING { ch_multiqc_files = ch_multiqc_files.mix( FILTLONG.out.log ) } - if (params.perform_fastqc_alternative) { - FALCO_PROCESSED ( ch_processed_reads ) - ch_multiqc_files = ch_multiqc_files.mix( FALCO_PROCESSED.out.txt ) - - } else { + if (params.preprocessing_qc_tool == 'fastqc') { FASTQC_PROCESSED ( ch_processed_reads ) ch_multiqc_files = ch_multiqc_files.mix( FASTQC_PROCESSED.out.zip ) + + } else if (params.preprocessing_qc_tool == 'falco') { + FALCO_PROCESSED ( ch_processed_reads ) + ch_multiqc_files = ch_multiqc_files.mix( FALCO_PROCESSED.out.txt ) } emit: diff --git a/subworkflows/local/shortread_preprocessing.nf b/subworkflows/local/shortread_preprocessing.nf index a67539a..f2cd738 100644 --- a/subworkflows/local/shortread_preprocessing.nf +++ b/subworkflows/local/shortread_preprocessing.nf @@ -28,15 +28,14 @@ workflow SHORTREAD_PREPROCESSING { ch_processed_reads = reads } - if (params.perform_fastqc_alternative) { - FALCO_PROCESSED ( ch_processed_reads ) - ch_versions = ch_versions.mix( FALCO_PROCESSED.out.versions ) - ch_multiqc_files = ch_multiqc_files.mix( FALCO_PROCESSED.out.txt ) - - } else { + if (params.preprocessing_qc_tool == 'fastqc') { FASTQC_PROCESSED ( ch_processed_reads ) ch_versions = ch_versions.mix( FASTQC_PROCESSED.out.versions ) ch_multiqc_files = ch_multiqc_files.mix( FASTQC_PROCESSED.out.zip ) + } else if (params.preprocessing_qc_tool == 'falco') { + FALCO_PROCESSED ( ch_processed_reads ) + ch_versions = ch_versions.mix( FALCO_PROCESSED.out.versions ) + ch_multiqc_files = ch_multiqc_files.mix( FALCO_PROCESSED.out.txt ) } emit: diff --git a/workflows/taxprofiler.nf b/workflows/taxprofiler.nf index 2435782..905cbd9 100644 --- a/workflows/taxprofiler.nf +++ b/workflows/taxprofiler.nf @@ -121,7 +121,7 @@ workflow TAXPROFILER { */ ch_input_for_fastqc = INPUT_CHECK.out.fastq.mix( INPUT_CHECK.out.nanopore ) - if ( params.perform_fastqc_alternative ) { + if ( params.preprocessing_qc_tool == 'falco' ) { FALCO ( ch_input_for_fastqc ) ch_versions = ch_versions.mix(FALCO.out.versions.first()) } else { @@ -257,9 +257,12 @@ workflow TAXPROFILER { ch_multiqc_files = ch_multiqc_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml')) ch_multiqc_files = ch_multiqc_files.mix(CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml.collect()) - if (!params.perform_fastqc_alternative) { + if (!params.preprocessing_qc_tool == 'falco') { ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}.ifEmpty([])) } + else { + ch_multiqc_files = ch_multiqc_files.mix(FALCO.out.txt.collect{it[1]}.ifEmpty([])) + } if (params.perform_shortread_qc) { ch_multiqc_files = ch_multiqc_files.mix( SHORTREAD_PREPROCESSING.out.mqc.collect{it[1]}.ifEmpty([]) )