1
0
Fork 0
mirror of https://github.com/MillironX/taxprofiler.git synced 2024-09-21 03:12:04 +00:00

Add parameter for turning on longread host removal

This commit is contained in:
ljmesi 2022-05-05 13:19:10 +02:00
parent d94534e8ac
commit 557d31dfd2
4 changed files with 6 additions and 4 deletions

View file

@ -28,7 +28,7 @@ params {
perform_longread_clip = false perform_longread_clip = false
perform_shortread_complexityfilter = true perform_shortread_complexityfilter = true
perform_shortread_hostremoval = true perform_shortread_hostremoval = true
shortread_hostremoval_reference = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta' perform_longread_hostremoval = true
run_kaiju = true run_kaiju = true
run_kraken2 = true run_kraken2 = true
run_malt = true run_malt = true

View file

@ -191,7 +191,7 @@ You can optionally save the FASTQ output of the run merging with the `--save_com
#### Host Removal #### Host Removal
Removal of possible-host reads from FASTQ files prior profiling can be activated with `--perform_shortread_hostremoval` Removal of possible-host reads from FASTQ files prior profiling can be activated with `--perform_shortread_hostremoval` or `--perform_longread_hostremoval`.
Similarly to complexity filtering, host-removal can be useful for runtime optimisation and reduction in misclassified reads. It is not always necessary to report classification of reads from a host when you already know the host of the sample, therefore you can gain a run-time and computational advantage by removing these prior typically resource-heavy profiling with more efficient methods. Furthermore, particularly with human samples, you can reduce the number of false positives during profiling that occur due to host-sequence contamination in reference genomes on public databases. Similarly to complexity filtering, host-removal can be useful for runtime optimisation and reduction in misclassified reads. It is not always necessary to report classification of reads from a host when you already know the host of the sample, therefore you can gain a run-time and computational advantage by removing these prior typically resource-heavy profiling with more efficient methods. Furthermore, particularly with human samples, you can reduce the number of false positives during profiling that occur due to host-sequence contamination in reference genomes on public databases.

View file

@ -82,7 +82,7 @@ params {
// Host Removal // Host Removal
perform_shortread_hostremoval = false perform_shortread_hostremoval = false
shortread_hostremoval_reference = null perform_longread_hostremoval = false
shortread_hostremoval_index = null shortread_hostremoval_index = null
longread_hostremoval_index = null longread_hostremoval_index = null
save_hostremoval_index = false save_hostremoval_index = false

View file

@ -362,7 +362,9 @@
"perform_shortread_hostremoval": { "perform_shortread_hostremoval": {
"type": "boolean" "type": "boolean"
}, },
"shortread_hostremoval_reference": { "perform_longread_hostremoval": {
"type": "boolean"
},
"type": "string", "type": "string",
"default": "None" "default": "None"
}, },