1
0
Fork 0
mirror of https://github.com/MillironX/taxprofiler.git synced 2024-11-10 20:53:08 +00:00

Apply review suggestions

This commit is contained in:
Sofia Stamouli 2022-10-20 16:55:57 +02:00
parent 7881ba9aef
commit 511cb16a61
8 changed files with 75 additions and 24 deletions

View file

@ -23,7 +23,8 @@ jobs:
- "21.10.3"
- "latest-everything"
parameters:
- "--perform_fastqc_alternative false"
- "--preprocessing_qc_tool fastqc"
- "--preprocessing_qc_tool falco"
- "--perform_longread_qc false"
- "--perform_shortread_qc false"
- "--shortread_qc_tool fastp"

View file

@ -62,7 +62,7 @@
- [FILTLONG](https://github.com/rrwick/Filtlong)
- [Falco](https://doi.org/10.12688/f1000research.21142.2)
- [falco](https://doi.org/10.12688/f1000research.21142.2)
> de Sena Brandine G and Smith AD. Falco: high-speed FastQC emulation for quality control of sequencing data. F1000Research 2021, 8:1874

View file

@ -30,7 +30,7 @@ On release, automated continuous integration tests run the pipeline on a full-si
![](docs/images/taxprofiler_tube.png)
1. Read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/) or [`Falco`](https://github.com/smithlabcode/falco) as an alternative option)
1. Read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/) or [`falco`](https://github.com/smithlabcode/falco) as an alternative option)
2. Performs optional read pre-processing
- Adapter clipping and merging (short-read: [fastp](https://github.com/OpenGene/fastp), [AdapterRemoval2](https://github.com/MikkelSchubert/adapterremoval); long-read: [porechop](https://github.com/rrwick/Porechop))
- Low complexity and quality filtering (short-read: [bbduk](https://jgi.doe.gov/data-and-tools/software-tools/bbtools/), [PRINSEQ++](https://github.com/Adrian-Cantu/PRINSEQ-plus-plus); long-read: [Filtlong](https://github.com/rrwick/Filtlong))

View file

@ -59,6 +59,8 @@ params {
// Databases
databases = null
preprocessing_qc_tool = 'fastqc'
// FASTQ preprocessing
perform_shortread_qc = false
shortread_qc_tool = 'fastp'

View file

@ -10,7 +10,11 @@
"type": "object",
"fa_icon": "fas fa-terminal",
"description": "Define where the pipeline should find input data and save output data.",
"required": ["input", "outdir", "databases"],
"required": [
"input",
"outdir",
"databases"
],
"properties": {
"input": {
"type": "string",
@ -80,7 +84,10 @@
"shortread_qc_tool": {
"type": "string",
"default": "fastp",
"enum": ["fastp", "adapterremoval"],
"enum": [
"fastp",
"adapterremoval"
],
"fa_icon": "fas fa-tools",
"description": "Specify which tool to use for short-read QC"
},
@ -133,7 +140,11 @@
"shortread_complexityfilter_tool": {
"type": "string",
"default": "bbduk",
"enum": ["bbduk", "prinseqplusplus", "fastp"],
"enum": [
"bbduk",
"prinseqplusplus",
"fastp"
],
"fa_icon": "fas fa-hammer",
"description": "Specify which tool to use for complexity filtering"
},
@ -167,7 +178,10 @@
"shortread_complexityfilter_prinseqplusplus_mode": {
"type": "string",
"default": "entropy",
"enum": ["entropy", "dust"],
"enum": [
"entropy",
"dust"
],
"fa_icon": "fas fa-check-square",
"description": "Specify the complexity filter mode for PRINSEQ++"
},
@ -341,7 +355,15 @@
"diamond_output_format": {
"type": "string",
"default": "tsv",
"enum": ["blast", "xml", "txt", "daa", "sam", "tsv", "paf"],
"enum": [
"blast",
"xml",
"txt",
"daa",
"sam",
"tsv",
"paf"
],
"fa_icon": "fas fa-file",
"description": "Specify output format from DIAMOND profiling.",
"help_text": "DIAMOND can produce output in a number of different formats, you can specify here which to produce.\n\nNote that DIAMOND can only produce one format at a time, and depending on which you pick, some downstream steps may not be executed. For example, selecting `daa` or `sam` will mean you will not get a tabular taxonomic profile as with the other tools.\n\nWill be overriden by `--diamond_save_reads.`\n\n> Modifies tool parameter(s):\n> - diamond blastx: `--outfmt`"
@ -360,7 +382,14 @@
"kaiju_taxon_rank": {
"type": "string",
"default": "species",
"enum": ["phylum", "class", "order", "family", "genus", "species"],
"enum": [
"phylum",
"class",
"order",
"family",
"genus",
"species"
],
"fa_icon": "fas fa-tag",
"description": "Specify taxonomic rank to be displayed in Kaiju taxon table",
"help_text": "Specify the taxonomic level(s) to be displayed in the resulting Kaiju taxon table, as generated by the kaiju2table helper tool.\n\nThis can be either a single level (e.g. `species`), or a comma separated list to display the full taxonomic path (e.g. `superkingdom,phylum,class,order,family,genus,species.`).\n\n> Modifies tool parameter(s):\n> - kaiju2table: `-l`"
@ -555,7 +584,14 @@
"description": "Method used to save pipeline results to output directory.",
"help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.",
"fa_icon": "fas fa-copy",
"enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"],
"enum": [
"symlink",
"rellink",
"link",
"copy",
"copyNoFollow",
"move"
],
"hidden": true
},
"email_on_fail": {
@ -707,5 +743,15 @@
{
"$ref": "#/definitions/reference_genome_options"
}
]
],
"properties": {
"preprocessing_qc_tool": {
"type": "string",
"default": "fastqc",
"enum": [
"fastqc",
"falco"
]
}
}
}

View file

@ -54,13 +54,13 @@ workflow LONGREAD_PREPROCESSING {
ch_multiqc_files = ch_multiqc_files.mix( FILTLONG.out.log )
}
if (params.perform_fastqc_alternative) {
FALCO_PROCESSED ( ch_processed_reads )
ch_multiqc_files = ch_multiqc_files.mix( FALCO_PROCESSED.out.txt )
} else {
if (params.preprocessing_qc_tool == 'fastqc') {
FASTQC_PROCESSED ( ch_processed_reads )
ch_multiqc_files = ch_multiqc_files.mix( FASTQC_PROCESSED.out.zip )
} else if (params.preprocessing_qc_tool == 'falco') {
FALCO_PROCESSED ( ch_processed_reads )
ch_multiqc_files = ch_multiqc_files.mix( FALCO_PROCESSED.out.txt )
}
emit:

View file

@ -28,15 +28,14 @@ workflow SHORTREAD_PREPROCESSING {
ch_processed_reads = reads
}
if (params.perform_fastqc_alternative) {
FALCO_PROCESSED ( ch_processed_reads )
ch_versions = ch_versions.mix( FALCO_PROCESSED.out.versions )
ch_multiqc_files = ch_multiqc_files.mix( FALCO_PROCESSED.out.txt )
} else {
if (params.preprocessing_qc_tool == 'fastqc') {
FASTQC_PROCESSED ( ch_processed_reads )
ch_versions = ch_versions.mix( FASTQC_PROCESSED.out.versions )
ch_multiqc_files = ch_multiqc_files.mix( FASTQC_PROCESSED.out.zip )
} else if (params.preprocessing_qc_tool == 'falco') {
FALCO_PROCESSED ( ch_processed_reads )
ch_versions = ch_versions.mix( FALCO_PROCESSED.out.versions )
ch_multiqc_files = ch_multiqc_files.mix( FALCO_PROCESSED.out.txt )
}
emit:

View file

@ -121,7 +121,7 @@ workflow TAXPROFILER {
*/
ch_input_for_fastqc = INPUT_CHECK.out.fastq.mix( INPUT_CHECK.out.nanopore )
if ( params.perform_fastqc_alternative ) {
if ( params.preprocessing_qc_tool == 'falco' ) {
FALCO ( ch_input_for_fastqc )
ch_versions = ch_versions.mix(FALCO.out.versions.first())
} else {
@ -257,9 +257,12 @@ workflow TAXPROFILER {
ch_multiqc_files = ch_multiqc_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml'))
ch_multiqc_files = ch_multiqc_files.mix(CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml.collect())
if (!params.perform_fastqc_alternative) {
if (!params.preprocessing_qc_tool == 'falco') {
ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}.ifEmpty([]))
}
else {
ch_multiqc_files = ch_multiqc_files.mix(FALCO.out.txt.collect{it[1]}.ifEmpty([]))
}
if (params.perform_shortread_qc) {
ch_multiqc_files = ch_multiqc_files.mix( SHORTREAD_PREPROCESSING.out.mqc.collect{it[1]}.ifEmpty([]) )