1
0
Fork 0
mirror of https://github.com/MillironX/taxprofiler.git synced 2024-09-21 02:52:04 +00:00

Apply review suggestions

This commit is contained in:
Sofia Stamouli 2022-10-20 16:55:57 +02:00
parent 7881ba9aef
commit 511cb16a61
8 changed files with 75 additions and 24 deletions

View file

@ -23,7 +23,8 @@ jobs:
- "21.10.3" - "21.10.3"
- "latest-everything" - "latest-everything"
parameters: parameters:
- "--perform_fastqc_alternative false" - "--preprocessing_qc_tool fastqc"
- "--preprocessing_qc_tool falco"
- "--perform_longread_qc false" - "--perform_longread_qc false"
- "--perform_shortread_qc false" - "--perform_shortread_qc false"
- "--shortread_qc_tool fastp" - "--shortread_qc_tool fastp"

View file

@ -62,7 +62,7 @@
- [FILTLONG](https://github.com/rrwick/Filtlong) - [FILTLONG](https://github.com/rrwick/Filtlong)
- [Falco](https://doi.org/10.12688/f1000research.21142.2) - [falco](https://doi.org/10.12688/f1000research.21142.2)
> de Sena Brandine G and Smith AD. Falco: high-speed FastQC emulation for quality control of sequencing data. F1000Research 2021, 8:1874 > de Sena Brandine G and Smith AD. Falco: high-speed FastQC emulation for quality control of sequencing data. F1000Research 2021, 8:1874

View file

@ -30,7 +30,7 @@ On release, automated continuous integration tests run the pipeline on a full-si
![](docs/images/taxprofiler_tube.png) ![](docs/images/taxprofiler_tube.png)
1. Read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/) or [`Falco`](https://github.com/smithlabcode/falco) as an alternative option) 1. Read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/) or [`falco`](https://github.com/smithlabcode/falco) as an alternative option)
2. Performs optional read pre-processing 2. Performs optional read pre-processing
- Adapter clipping and merging (short-read: [fastp](https://github.com/OpenGene/fastp), [AdapterRemoval2](https://github.com/MikkelSchubert/adapterremoval); long-read: [porechop](https://github.com/rrwick/Porechop)) - Adapter clipping and merging (short-read: [fastp](https://github.com/OpenGene/fastp), [AdapterRemoval2](https://github.com/MikkelSchubert/adapterremoval); long-read: [porechop](https://github.com/rrwick/Porechop))
- Low complexity and quality filtering (short-read: [bbduk](https://jgi.doe.gov/data-and-tools/software-tools/bbtools/), [PRINSEQ++](https://github.com/Adrian-Cantu/PRINSEQ-plus-plus); long-read: [Filtlong](https://github.com/rrwick/Filtlong)) - Low complexity and quality filtering (short-read: [bbduk](https://jgi.doe.gov/data-and-tools/software-tools/bbtools/), [PRINSEQ++](https://github.com/Adrian-Cantu/PRINSEQ-plus-plus); long-read: [Filtlong](https://github.com/rrwick/Filtlong))

View file

@ -59,6 +59,8 @@ params {
// Databases // Databases
databases = null databases = null
preprocessing_qc_tool = 'fastqc'
// FASTQ preprocessing // FASTQ preprocessing
perform_shortread_qc = false perform_shortread_qc = false
shortread_qc_tool = 'fastp' shortread_qc_tool = 'fastp'

View file

@ -10,7 +10,11 @@
"type": "object", "type": "object",
"fa_icon": "fas fa-terminal", "fa_icon": "fas fa-terminal",
"description": "Define where the pipeline should find input data and save output data.", "description": "Define where the pipeline should find input data and save output data.",
"required": ["input", "outdir", "databases"], "required": [
"input",
"outdir",
"databases"
],
"properties": { "properties": {
"input": { "input": {
"type": "string", "type": "string",
@ -80,7 +84,10 @@
"shortread_qc_tool": { "shortread_qc_tool": {
"type": "string", "type": "string",
"default": "fastp", "default": "fastp",
"enum": ["fastp", "adapterremoval"], "enum": [
"fastp",
"adapterremoval"
],
"fa_icon": "fas fa-tools", "fa_icon": "fas fa-tools",
"description": "Specify which tool to use for short-read QC" "description": "Specify which tool to use for short-read QC"
}, },
@ -133,7 +140,11 @@
"shortread_complexityfilter_tool": { "shortread_complexityfilter_tool": {
"type": "string", "type": "string",
"default": "bbduk", "default": "bbduk",
"enum": ["bbduk", "prinseqplusplus", "fastp"], "enum": [
"bbduk",
"prinseqplusplus",
"fastp"
],
"fa_icon": "fas fa-hammer", "fa_icon": "fas fa-hammer",
"description": "Specify which tool to use for complexity filtering" "description": "Specify which tool to use for complexity filtering"
}, },
@ -167,7 +178,10 @@
"shortread_complexityfilter_prinseqplusplus_mode": { "shortread_complexityfilter_prinseqplusplus_mode": {
"type": "string", "type": "string",
"default": "entropy", "default": "entropy",
"enum": ["entropy", "dust"], "enum": [
"entropy",
"dust"
],
"fa_icon": "fas fa-check-square", "fa_icon": "fas fa-check-square",
"description": "Specify the complexity filter mode for PRINSEQ++" "description": "Specify the complexity filter mode for PRINSEQ++"
}, },
@ -341,7 +355,15 @@
"diamond_output_format": { "diamond_output_format": {
"type": "string", "type": "string",
"default": "tsv", "default": "tsv",
"enum": ["blast", "xml", "txt", "daa", "sam", "tsv", "paf"], "enum": [
"blast",
"xml",
"txt",
"daa",
"sam",
"tsv",
"paf"
],
"fa_icon": "fas fa-file", "fa_icon": "fas fa-file",
"description": "Specify output format from DIAMOND profiling.", "description": "Specify output format from DIAMOND profiling.",
"help_text": "DIAMOND can produce output in a number of different formats, you can specify here which to produce.\n\nNote that DIAMOND can only produce one format at a time, and depending on which you pick, some downstream steps may not be executed. For example, selecting `daa` or `sam` will mean you will not get a tabular taxonomic profile as with the other tools.\n\nWill be overriden by `--diamond_save_reads.`\n\n> Modifies tool parameter(s):\n> - diamond blastx: `--outfmt`" "help_text": "DIAMOND can produce output in a number of different formats, you can specify here which to produce.\n\nNote that DIAMOND can only produce one format at a time, and depending on which you pick, some downstream steps may not be executed. For example, selecting `daa` or `sam` will mean you will not get a tabular taxonomic profile as with the other tools.\n\nWill be overriden by `--diamond_save_reads.`\n\n> Modifies tool parameter(s):\n> - diamond blastx: `--outfmt`"
@ -360,7 +382,14 @@
"kaiju_taxon_rank": { "kaiju_taxon_rank": {
"type": "string", "type": "string",
"default": "species", "default": "species",
"enum": ["phylum", "class", "order", "family", "genus", "species"], "enum": [
"phylum",
"class",
"order",
"family",
"genus",
"species"
],
"fa_icon": "fas fa-tag", "fa_icon": "fas fa-tag",
"description": "Specify taxonomic rank to be displayed in Kaiju taxon table", "description": "Specify taxonomic rank to be displayed in Kaiju taxon table",
"help_text": "Specify the taxonomic level(s) to be displayed in the resulting Kaiju taxon table, as generated by the kaiju2table helper tool.\n\nThis can be either a single level (e.g. `species`), or a comma separated list to display the full taxonomic path (e.g. `superkingdom,phylum,class,order,family,genus,species.`).\n\n> Modifies tool parameter(s):\n> - kaiju2table: `-l`" "help_text": "Specify the taxonomic level(s) to be displayed in the resulting Kaiju taxon table, as generated by the kaiju2table helper tool.\n\nThis can be either a single level (e.g. `species`), or a comma separated list to display the full taxonomic path (e.g. `superkingdom,phylum,class,order,family,genus,species.`).\n\n> Modifies tool parameter(s):\n> - kaiju2table: `-l`"
@ -555,7 +584,14 @@
"description": "Method used to save pipeline results to output directory.", "description": "Method used to save pipeline results to output directory.",
"help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.", "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.",
"fa_icon": "fas fa-copy", "fa_icon": "fas fa-copy",
"enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"], "enum": [
"symlink",
"rellink",
"link",
"copy",
"copyNoFollow",
"move"
],
"hidden": true "hidden": true
}, },
"email_on_fail": { "email_on_fail": {
@ -707,5 +743,15 @@
{ {
"$ref": "#/definitions/reference_genome_options" "$ref": "#/definitions/reference_genome_options"
} }
] ],
"properties": {
"preprocessing_qc_tool": {
"type": "string",
"default": "fastqc",
"enum": [
"fastqc",
"falco"
]
}
}
} }

View file

@ -54,13 +54,13 @@ workflow LONGREAD_PREPROCESSING {
ch_multiqc_files = ch_multiqc_files.mix( FILTLONG.out.log ) ch_multiqc_files = ch_multiqc_files.mix( FILTLONG.out.log )
} }
if (params.perform_fastqc_alternative) { if (params.preprocessing_qc_tool == 'fastqc') {
FALCO_PROCESSED ( ch_processed_reads )
ch_multiqc_files = ch_multiqc_files.mix( FALCO_PROCESSED.out.txt )
} else {
FASTQC_PROCESSED ( ch_processed_reads ) FASTQC_PROCESSED ( ch_processed_reads )
ch_multiqc_files = ch_multiqc_files.mix( FASTQC_PROCESSED.out.zip ) ch_multiqc_files = ch_multiqc_files.mix( FASTQC_PROCESSED.out.zip )
} else if (params.preprocessing_qc_tool == 'falco') {
FALCO_PROCESSED ( ch_processed_reads )
ch_multiqc_files = ch_multiqc_files.mix( FALCO_PROCESSED.out.txt )
} }
emit: emit:

View file

@ -28,15 +28,14 @@ workflow SHORTREAD_PREPROCESSING {
ch_processed_reads = reads ch_processed_reads = reads
} }
if (params.perform_fastqc_alternative) { if (params.preprocessing_qc_tool == 'fastqc') {
FALCO_PROCESSED ( ch_processed_reads )
ch_versions = ch_versions.mix( FALCO_PROCESSED.out.versions )
ch_multiqc_files = ch_multiqc_files.mix( FALCO_PROCESSED.out.txt )
} else {
FASTQC_PROCESSED ( ch_processed_reads ) FASTQC_PROCESSED ( ch_processed_reads )
ch_versions = ch_versions.mix( FASTQC_PROCESSED.out.versions ) ch_versions = ch_versions.mix( FASTQC_PROCESSED.out.versions )
ch_multiqc_files = ch_multiqc_files.mix( FASTQC_PROCESSED.out.zip ) ch_multiqc_files = ch_multiqc_files.mix( FASTQC_PROCESSED.out.zip )
} else if (params.preprocessing_qc_tool == 'falco') {
FALCO_PROCESSED ( ch_processed_reads )
ch_versions = ch_versions.mix( FALCO_PROCESSED.out.versions )
ch_multiqc_files = ch_multiqc_files.mix( FALCO_PROCESSED.out.txt )
} }
emit: emit:

View file

@ -121,7 +121,7 @@ workflow TAXPROFILER {
*/ */
ch_input_for_fastqc = INPUT_CHECK.out.fastq.mix( INPUT_CHECK.out.nanopore ) ch_input_for_fastqc = INPUT_CHECK.out.fastq.mix( INPUT_CHECK.out.nanopore )
if ( params.perform_fastqc_alternative ) { if ( params.preprocessing_qc_tool == 'falco' ) {
FALCO ( ch_input_for_fastqc ) FALCO ( ch_input_for_fastqc )
ch_versions = ch_versions.mix(FALCO.out.versions.first()) ch_versions = ch_versions.mix(FALCO.out.versions.first())
} else { } else {
@ -257,9 +257,12 @@ workflow TAXPROFILER {
ch_multiqc_files = ch_multiqc_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml')) ch_multiqc_files = ch_multiqc_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml'))
ch_multiqc_files = ch_multiqc_files.mix(CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml.collect()) ch_multiqc_files = ch_multiqc_files.mix(CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml.collect())
if (!params.perform_fastqc_alternative) { if (!params.preprocessing_qc_tool == 'falco') {
ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}.ifEmpty([])) ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}.ifEmpty([]))
} }
else {
ch_multiqc_files = ch_multiqc_files.mix(FALCO.out.txt.collect{it[1]}.ifEmpty([]))
}
if (params.perform_shortread_qc) { if (params.perform_shortread_qc) {
ch_multiqc_files = ch_multiqc_files.mix( SHORTREAD_PREPROCESSING.out.mqc.collect{it[1]}.ifEmpty([]) ) ch_multiqc_files = ch_multiqc_files.mix( SHORTREAD_PREPROCESSING.out.mqc.collect{it[1]}.ifEmpty([]) )