mirror of
https://github.com/MillironX/taxprofiler.git
synced 2024-11-25 17:09:54 +00:00
Apply review suggestions
This commit is contained in:
parent
7881ba9aef
commit
511cb16a61
8 changed files with 75 additions and 24 deletions
3
.github/workflows/ci.yml
vendored
3
.github/workflows/ci.yml
vendored
|
@ -23,7 +23,8 @@ jobs:
|
||||||
- "21.10.3"
|
- "21.10.3"
|
||||||
- "latest-everything"
|
- "latest-everything"
|
||||||
parameters:
|
parameters:
|
||||||
- "--perform_fastqc_alternative false"
|
- "--preprocessing_qc_tool fastqc"
|
||||||
|
- "--preprocessing_qc_tool falco"
|
||||||
- "--perform_longread_qc false"
|
- "--perform_longread_qc false"
|
||||||
- "--perform_shortread_qc false"
|
- "--perform_shortread_qc false"
|
||||||
- "--shortread_qc_tool fastp"
|
- "--shortread_qc_tool fastp"
|
||||||
|
|
|
@ -62,7 +62,7 @@
|
||||||
|
|
||||||
- [FILTLONG](https://github.com/rrwick/Filtlong)
|
- [FILTLONG](https://github.com/rrwick/Filtlong)
|
||||||
|
|
||||||
- [Falco](https://doi.org/10.12688/f1000research.21142.2)
|
- [falco](https://doi.org/10.12688/f1000research.21142.2)
|
||||||
|
|
||||||
> de Sena Brandine G and Smith AD. Falco: high-speed FastQC emulation for quality control of sequencing data. F1000Research 2021, 8:1874
|
> de Sena Brandine G and Smith AD. Falco: high-speed FastQC emulation for quality control of sequencing data. F1000Research 2021, 8:1874
|
||||||
|
|
||||||
|
|
|
@ -30,7 +30,7 @@ On release, automated continuous integration tests run the pipeline on a full-si
|
||||||
|
|
||||||
![](docs/images/taxprofiler_tube.png)
|
![](docs/images/taxprofiler_tube.png)
|
||||||
|
|
||||||
1. Read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/) or [`Falco`](https://github.com/smithlabcode/falco) as an alternative option)
|
1. Read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/) or [`falco`](https://github.com/smithlabcode/falco) as an alternative option)
|
||||||
2. Performs optional read pre-processing
|
2. Performs optional read pre-processing
|
||||||
- Adapter clipping and merging (short-read: [fastp](https://github.com/OpenGene/fastp), [AdapterRemoval2](https://github.com/MikkelSchubert/adapterremoval); long-read: [porechop](https://github.com/rrwick/Porechop))
|
- Adapter clipping and merging (short-read: [fastp](https://github.com/OpenGene/fastp), [AdapterRemoval2](https://github.com/MikkelSchubert/adapterremoval); long-read: [porechop](https://github.com/rrwick/Porechop))
|
||||||
- Low complexity and quality filtering (short-read: [bbduk](https://jgi.doe.gov/data-and-tools/software-tools/bbtools/), [PRINSEQ++](https://github.com/Adrian-Cantu/PRINSEQ-plus-plus); long-read: [Filtlong](https://github.com/rrwick/Filtlong))
|
- Low complexity and quality filtering (short-read: [bbduk](https://jgi.doe.gov/data-and-tools/software-tools/bbtools/), [PRINSEQ++](https://github.com/Adrian-Cantu/PRINSEQ-plus-plus); long-read: [Filtlong](https://github.com/rrwick/Filtlong))
|
||||||
|
|
|
@ -59,6 +59,8 @@ params {
|
||||||
// Databases
|
// Databases
|
||||||
databases = null
|
databases = null
|
||||||
|
|
||||||
|
preprocessing_qc_tool = 'fastqc'
|
||||||
|
|
||||||
// FASTQ preprocessing
|
// FASTQ preprocessing
|
||||||
perform_shortread_qc = false
|
perform_shortread_qc = false
|
||||||
shortread_qc_tool = 'fastp'
|
shortread_qc_tool = 'fastp'
|
||||||
|
|
|
@ -10,7 +10,11 @@
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"fa_icon": "fas fa-terminal",
|
"fa_icon": "fas fa-terminal",
|
||||||
"description": "Define where the pipeline should find input data and save output data.",
|
"description": "Define where the pipeline should find input data and save output data.",
|
||||||
"required": ["input", "outdir", "databases"],
|
"required": [
|
||||||
|
"input",
|
||||||
|
"outdir",
|
||||||
|
"databases"
|
||||||
|
],
|
||||||
"properties": {
|
"properties": {
|
||||||
"input": {
|
"input": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
|
@ -80,7 +84,10 @@
|
||||||
"shortread_qc_tool": {
|
"shortread_qc_tool": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"default": "fastp",
|
"default": "fastp",
|
||||||
"enum": ["fastp", "adapterremoval"],
|
"enum": [
|
||||||
|
"fastp",
|
||||||
|
"adapterremoval"
|
||||||
|
],
|
||||||
"fa_icon": "fas fa-tools",
|
"fa_icon": "fas fa-tools",
|
||||||
"description": "Specify which tool to use for short-read QC"
|
"description": "Specify which tool to use for short-read QC"
|
||||||
},
|
},
|
||||||
|
@ -133,7 +140,11 @@
|
||||||
"shortread_complexityfilter_tool": {
|
"shortread_complexityfilter_tool": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"default": "bbduk",
|
"default": "bbduk",
|
||||||
"enum": ["bbduk", "prinseqplusplus", "fastp"],
|
"enum": [
|
||||||
|
"bbduk",
|
||||||
|
"prinseqplusplus",
|
||||||
|
"fastp"
|
||||||
|
],
|
||||||
"fa_icon": "fas fa-hammer",
|
"fa_icon": "fas fa-hammer",
|
||||||
"description": "Specify which tool to use for complexity filtering"
|
"description": "Specify which tool to use for complexity filtering"
|
||||||
},
|
},
|
||||||
|
@ -167,7 +178,10 @@
|
||||||
"shortread_complexityfilter_prinseqplusplus_mode": {
|
"shortread_complexityfilter_prinseqplusplus_mode": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"default": "entropy",
|
"default": "entropy",
|
||||||
"enum": ["entropy", "dust"],
|
"enum": [
|
||||||
|
"entropy",
|
||||||
|
"dust"
|
||||||
|
],
|
||||||
"fa_icon": "fas fa-check-square",
|
"fa_icon": "fas fa-check-square",
|
||||||
"description": "Specify the complexity filter mode for PRINSEQ++"
|
"description": "Specify the complexity filter mode for PRINSEQ++"
|
||||||
},
|
},
|
||||||
|
@ -341,7 +355,15 @@
|
||||||
"diamond_output_format": {
|
"diamond_output_format": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"default": "tsv",
|
"default": "tsv",
|
||||||
"enum": ["blast", "xml", "txt", "daa", "sam", "tsv", "paf"],
|
"enum": [
|
||||||
|
"blast",
|
||||||
|
"xml",
|
||||||
|
"txt",
|
||||||
|
"daa",
|
||||||
|
"sam",
|
||||||
|
"tsv",
|
||||||
|
"paf"
|
||||||
|
],
|
||||||
"fa_icon": "fas fa-file",
|
"fa_icon": "fas fa-file",
|
||||||
"description": "Specify output format from DIAMOND profiling.",
|
"description": "Specify output format from DIAMOND profiling.",
|
||||||
"help_text": "DIAMOND can produce output in a number of different formats, you can specify here which to produce.\n\nNote that DIAMOND can only produce one format at a time, and depending on which you pick, some downstream steps may not be executed. For example, selecting `daa` or `sam` will mean you will not get a tabular taxonomic profile as with the other tools.\n\nWill be overriden by `--diamond_save_reads.`\n\n> Modifies tool parameter(s):\n> - diamond blastx: `--outfmt`"
|
"help_text": "DIAMOND can produce output in a number of different formats, you can specify here which to produce.\n\nNote that DIAMOND can only produce one format at a time, and depending on which you pick, some downstream steps may not be executed. For example, selecting `daa` or `sam` will mean you will not get a tabular taxonomic profile as with the other tools.\n\nWill be overriden by `--diamond_save_reads.`\n\n> Modifies tool parameter(s):\n> - diamond blastx: `--outfmt`"
|
||||||
|
@ -360,7 +382,14 @@
|
||||||
"kaiju_taxon_rank": {
|
"kaiju_taxon_rank": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"default": "species",
|
"default": "species",
|
||||||
"enum": ["phylum", "class", "order", "family", "genus", "species"],
|
"enum": [
|
||||||
|
"phylum",
|
||||||
|
"class",
|
||||||
|
"order",
|
||||||
|
"family",
|
||||||
|
"genus",
|
||||||
|
"species"
|
||||||
|
],
|
||||||
"fa_icon": "fas fa-tag",
|
"fa_icon": "fas fa-tag",
|
||||||
"description": "Specify taxonomic rank to be displayed in Kaiju taxon table",
|
"description": "Specify taxonomic rank to be displayed in Kaiju taxon table",
|
||||||
"help_text": "Specify the taxonomic level(s) to be displayed in the resulting Kaiju taxon table, as generated by the kaiju2table helper tool.\n\nThis can be either a single level (e.g. `species`), or a comma separated list to display the full taxonomic path (e.g. `superkingdom,phylum,class,order,family,genus,species.`).\n\n> Modifies tool parameter(s):\n> - kaiju2table: `-l`"
|
"help_text": "Specify the taxonomic level(s) to be displayed in the resulting Kaiju taxon table, as generated by the kaiju2table helper tool.\n\nThis can be either a single level (e.g. `species`), or a comma separated list to display the full taxonomic path (e.g. `superkingdom,phylum,class,order,family,genus,species.`).\n\n> Modifies tool parameter(s):\n> - kaiju2table: `-l`"
|
||||||
|
@ -555,7 +584,14 @@
|
||||||
"description": "Method used to save pipeline results to output directory.",
|
"description": "Method used to save pipeline results to output directory.",
|
||||||
"help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.",
|
"help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.",
|
||||||
"fa_icon": "fas fa-copy",
|
"fa_icon": "fas fa-copy",
|
||||||
"enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"],
|
"enum": [
|
||||||
|
"symlink",
|
||||||
|
"rellink",
|
||||||
|
"link",
|
||||||
|
"copy",
|
||||||
|
"copyNoFollow",
|
||||||
|
"move"
|
||||||
|
],
|
||||||
"hidden": true
|
"hidden": true
|
||||||
},
|
},
|
||||||
"email_on_fail": {
|
"email_on_fail": {
|
||||||
|
@ -707,5 +743,15 @@
|
||||||
{
|
{
|
||||||
"$ref": "#/definitions/reference_genome_options"
|
"$ref": "#/definitions/reference_genome_options"
|
||||||
}
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"preprocessing_qc_tool": {
|
||||||
|
"type": "string",
|
||||||
|
"default": "fastqc",
|
||||||
|
"enum": [
|
||||||
|
"fastqc",
|
||||||
|
"falco"
|
||||||
]
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -54,13 +54,13 @@ workflow LONGREAD_PREPROCESSING {
|
||||||
ch_multiqc_files = ch_multiqc_files.mix( FILTLONG.out.log )
|
ch_multiqc_files = ch_multiqc_files.mix( FILTLONG.out.log )
|
||||||
}
|
}
|
||||||
|
|
||||||
if (params.perform_fastqc_alternative) {
|
if (params.preprocessing_qc_tool == 'fastqc') {
|
||||||
FALCO_PROCESSED ( ch_processed_reads )
|
|
||||||
ch_multiqc_files = ch_multiqc_files.mix( FALCO_PROCESSED.out.txt )
|
|
||||||
|
|
||||||
} else {
|
|
||||||
FASTQC_PROCESSED ( ch_processed_reads )
|
FASTQC_PROCESSED ( ch_processed_reads )
|
||||||
ch_multiqc_files = ch_multiqc_files.mix( FASTQC_PROCESSED.out.zip )
|
ch_multiqc_files = ch_multiqc_files.mix( FASTQC_PROCESSED.out.zip )
|
||||||
|
|
||||||
|
} else if (params.preprocessing_qc_tool == 'falco') {
|
||||||
|
FALCO_PROCESSED ( ch_processed_reads )
|
||||||
|
ch_multiqc_files = ch_multiqc_files.mix( FALCO_PROCESSED.out.txt )
|
||||||
}
|
}
|
||||||
|
|
||||||
emit:
|
emit:
|
||||||
|
|
|
@ -28,15 +28,14 @@ workflow SHORTREAD_PREPROCESSING {
|
||||||
ch_processed_reads = reads
|
ch_processed_reads = reads
|
||||||
}
|
}
|
||||||
|
|
||||||
if (params.perform_fastqc_alternative) {
|
if (params.preprocessing_qc_tool == 'fastqc') {
|
||||||
FALCO_PROCESSED ( ch_processed_reads )
|
|
||||||
ch_versions = ch_versions.mix( FALCO_PROCESSED.out.versions )
|
|
||||||
ch_multiqc_files = ch_multiqc_files.mix( FALCO_PROCESSED.out.txt )
|
|
||||||
|
|
||||||
} else {
|
|
||||||
FASTQC_PROCESSED ( ch_processed_reads )
|
FASTQC_PROCESSED ( ch_processed_reads )
|
||||||
ch_versions = ch_versions.mix( FASTQC_PROCESSED.out.versions )
|
ch_versions = ch_versions.mix( FASTQC_PROCESSED.out.versions )
|
||||||
ch_multiqc_files = ch_multiqc_files.mix( FASTQC_PROCESSED.out.zip )
|
ch_multiqc_files = ch_multiqc_files.mix( FASTQC_PROCESSED.out.zip )
|
||||||
|
} else if (params.preprocessing_qc_tool == 'falco') {
|
||||||
|
FALCO_PROCESSED ( ch_processed_reads )
|
||||||
|
ch_versions = ch_versions.mix( FALCO_PROCESSED.out.versions )
|
||||||
|
ch_multiqc_files = ch_multiqc_files.mix( FALCO_PROCESSED.out.txt )
|
||||||
}
|
}
|
||||||
|
|
||||||
emit:
|
emit:
|
||||||
|
|
|
@ -121,7 +121,7 @@ workflow TAXPROFILER {
|
||||||
*/
|
*/
|
||||||
ch_input_for_fastqc = INPUT_CHECK.out.fastq.mix( INPUT_CHECK.out.nanopore )
|
ch_input_for_fastqc = INPUT_CHECK.out.fastq.mix( INPUT_CHECK.out.nanopore )
|
||||||
|
|
||||||
if ( params.perform_fastqc_alternative ) {
|
if ( params.preprocessing_qc_tool == 'falco' ) {
|
||||||
FALCO ( ch_input_for_fastqc )
|
FALCO ( ch_input_for_fastqc )
|
||||||
ch_versions = ch_versions.mix(FALCO.out.versions.first())
|
ch_versions = ch_versions.mix(FALCO.out.versions.first())
|
||||||
} else {
|
} else {
|
||||||
|
@ -257,9 +257,12 @@ workflow TAXPROFILER {
|
||||||
ch_multiqc_files = ch_multiqc_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml'))
|
ch_multiqc_files = ch_multiqc_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml'))
|
||||||
ch_multiqc_files = ch_multiqc_files.mix(CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml.collect())
|
ch_multiqc_files = ch_multiqc_files.mix(CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml.collect())
|
||||||
|
|
||||||
if (!params.perform_fastqc_alternative) {
|
if (!params.preprocessing_qc_tool == 'falco') {
|
||||||
ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}.ifEmpty([]))
|
ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}.ifEmpty([]))
|
||||||
}
|
}
|
||||||
|
else {
|
||||||
|
ch_multiqc_files = ch_multiqc_files.mix(FALCO.out.txt.collect{it[1]}.ifEmpty([]))
|
||||||
|
}
|
||||||
|
|
||||||
if (params.perform_shortread_qc) {
|
if (params.perform_shortread_qc) {
|
||||||
ch_multiqc_files = ch_multiqc_files.mix( SHORTREAD_PREPROCESSING.out.mqc.collect{it[1]}.ifEmpty([]) )
|
ch_multiqc_files = ch_multiqc_files.mix( SHORTREAD_PREPROCESSING.out.mqc.collect{it[1]}.ifEmpty([]) )
|
||||||
|
|
Loading…
Reference in a new issue