mirror of
https://github.com/MillironX/taxprofiler.git
synced 2024-11-22 10:09:55 +00:00
Merge pull request #253 from nf-core/futher-further-prerelease-updates
Futher further prerelease updates: FALCO display in MultiQC
This commit is contained in:
commit
d647e61a2f
5 changed files with 64 additions and 97 deletions
|
@ -39,6 +39,10 @@ sp:
|
||||||
diamond:
|
diamond:
|
||||||
contents: "diamond v"
|
contents: "diamond v"
|
||||||
num_lines: 10
|
num_lines: 10
|
||||||
|
fastqc/data:
|
||||||
|
fn_re: ".*(fastqc|falco)_data.txt$"
|
||||||
|
fastqc/zip:
|
||||||
|
fn: "*_fastqc.zip"
|
||||||
|
|
||||||
#extra_fn_clean_exts:
|
#extra_fn_clean_exts:
|
||||||
# - '_fastp'
|
# - '_fastp'
|
||||||
|
@ -47,25 +51,23 @@ sp:
|
||||||
|
|
||||||
top_modules:
|
top_modules:
|
||||||
- "fastqc":
|
- "fastqc":
|
||||||
name: "FastQC (pre-Trimming)"
|
name: "FastQC / Falco (pre-Trimming)"
|
||||||
path_filters:
|
path_filters:
|
||||||
- "*raw_*fastqc.zip"
|
- "*raw*"
|
||||||
|
path_filters_exclude:
|
||||||
|
- "*processed*"
|
||||||
|
extra: "If used in this run, Falco is a drop-in replacement for FastQC producing the same output, written by Guilherme de Sena Brandine and Andrew D. Smith."
|
||||||
- "fastqc":
|
- "fastqc":
|
||||||
name: "Falco (pre-Trimming)"
|
name: "FastQC / Falco (post-Trimming)"
|
||||||
path_filters:
|
path_filters:
|
||||||
- "*_raw_falco_*_report.html"
|
- "*processed*"
|
||||||
|
path_filters_exclude:
|
||||||
|
- "*raw*"
|
||||||
|
extra: "If used in this run, Falco is a drop-in replacement for FastQC producing the same output, written by Guilherme de Sena Brandine and Andrew D. Smith."
|
||||||
- "fastp"
|
- "fastp"
|
||||||
- "adapterRemoval"
|
- "adapterRemoval"
|
||||||
- "porechop":
|
- "porechop":
|
||||||
extra: "ℹ️: if you get the error message 'Error - was not able to plot data.' this means that porechop did not detect any adapters and therefore no statistics generated."
|
extra: "ℹ️: if you get the error message 'Error - was not able to plot data.' this means that porechop did not detect any adapters and therefore no statistics generated."
|
||||||
- "fastqc":
|
|
||||||
name: "FastQC (post-Trimming)"
|
|
||||||
path_filters:
|
|
||||||
- "*_processed_*fastqc.zip"
|
|
||||||
- "fastqc":
|
|
||||||
name: "Falco (post-Trimming)"
|
|
||||||
path_filters:
|
|
||||||
- "*_processed_falco_*_report.html"
|
|
||||||
- "bbduk"
|
- "bbduk"
|
||||||
- "prinseqplusplus"
|
- "prinseqplusplus"
|
||||||
- "filtlong"
|
- "filtlong"
|
||||||
|
@ -105,19 +107,20 @@ top_modules:
|
||||||
#It is not possible to set placement for custom kraken and centrifuge columns.
|
#It is not possible to set placement for custom kraken and centrifuge columns.
|
||||||
|
|
||||||
table_columns_placement:
|
table_columns_placement:
|
||||||
FastQC (pre-Trimming):
|
FastQC / Falco (pre-Trimming):
|
||||||
total_sequences: 100
|
total_sequences: 100
|
||||||
avg_sequence_length: 110
|
avg_sequence_length: 110
|
||||||
median_sequence_length: 120
|
median_sequence_length: 120
|
||||||
percent_duplicates: 130
|
percent_duplicates: 130
|
||||||
percent_gc: 140
|
percent_gc: 140
|
||||||
percent_fails: 150
|
percent_fails: 150
|
||||||
Falco (pre-Trimming):
|
FastQC / Falco (post-Trimming):
|
||||||
total_sequences: 200
|
total_sequences: 200
|
||||||
avg_sequence_length: 210
|
avg_sequence_length: 210
|
||||||
percent_duplicates: 220
|
median_sequence_length: 220
|
||||||
percent_gc: 230
|
percent_duplicates: 230
|
||||||
percent_fails: 240
|
percent_gc: 240
|
||||||
|
percent_fails: 250
|
||||||
fastp:
|
fastp:
|
||||||
pct_adapter: 300
|
pct_adapter: 300
|
||||||
pct_surviving: 310
|
pct_surviving: 310
|
||||||
|
@ -141,19 +144,6 @@ table_columns_placement:
|
||||||
Middle Split Percent: 460
|
Middle Split Percent: 460
|
||||||
Filtlong:
|
Filtlong:
|
||||||
Target bases: 500
|
Target bases: 500
|
||||||
FastQC (post-Trimming):
|
|
||||||
total_sequences: 600
|
|
||||||
avg_sequence_length: 610
|
|
||||||
median_sequence_length: 620
|
|
||||||
percent_duplicates: 630
|
|
||||||
percent_gc: 640
|
|
||||||
percent_fails: 650
|
|
||||||
Falco (post-Trimming):
|
|
||||||
total_sequences: 700
|
|
||||||
avg_sequence_length: 710
|
|
||||||
percent_duplicates: 720
|
|
||||||
percent_gc: 730
|
|
||||||
percent_fails: 740
|
|
||||||
BBDuk:
|
BBDuk:
|
||||||
Input reads: 800
|
Input reads: 800
|
||||||
Total Removed bases percent: 810
|
Total Removed bases percent: 810
|
||||||
|
@ -205,25 +195,18 @@ table_columns_placement:
|
||||||
"Number of ext-mOTUs": 1880
|
"Number of ext-mOTUs": 1880
|
||||||
|
|
||||||
table_columns_visible:
|
table_columns_visible:
|
||||||
FastQC (pre-Trimming):
|
FastQC / Falco (pre-Trimming):
|
||||||
total_sequences: True
|
total_sequences: True
|
||||||
avg_sequence_length: True
|
avg_sequence_length: True
|
||||||
percent_duplicates: True
|
percent_duplicates: True
|
||||||
percent_gc: True
|
percent_gc: True
|
||||||
percent_fails: False
|
percent_fails: False
|
||||||
Falco (pre-Trimming):
|
FastQC / Falco (post-Trimming):
|
||||||
total_sequences: True
|
total_sequences: True
|
||||||
avg_sequence_length: True
|
avg_sequence_length: True
|
||||||
percent_duplicates: True
|
percent_duplicates: False
|
||||||
percent_gc: True
|
percent_gc: False
|
||||||
percent_fails: False
|
percent_fails: False
|
||||||
fastp:
|
|
||||||
pct_adapter: True
|
|
||||||
pct_surviving: True
|
|
||||||
pct_duplication: False
|
|
||||||
after_filtering_gc_content: False
|
|
||||||
after_filtering_q30_rate: False
|
|
||||||
after_filtering_q30_bases: False
|
|
||||||
porechop:
|
porechop:
|
||||||
Input reads: False
|
Input reads: False
|
||||||
Start Trimmed:
|
Start Trimmed:
|
||||||
|
@ -232,6 +215,13 @@ table_columns_visible:
|
||||||
End Trimmed Percent: True
|
End Trimmed Percent: True
|
||||||
Middle Split: False
|
Middle Split: False
|
||||||
Middle Split Percent: True
|
Middle Split Percent: True
|
||||||
|
fastp:
|
||||||
|
pct_adapter: True
|
||||||
|
pct_surviving: True
|
||||||
|
pct_duplication: False
|
||||||
|
after_filtering_gc_content: False
|
||||||
|
after_filtering_q30_rate: False
|
||||||
|
after_filtering_q30_bases: False
|
||||||
Filtlong:
|
Filtlong:
|
||||||
Target bases: True
|
Target bases: True
|
||||||
Adapter Removal:
|
Adapter Removal:
|
||||||
|
@ -239,18 +229,6 @@ table_columns_visible:
|
||||||
percent_aligned: True
|
percent_aligned: True
|
||||||
percent_collapsed: True
|
percent_collapsed: True
|
||||||
percent_discarded: False
|
percent_discarded: False
|
||||||
FastQC (post-Trimming):
|
|
||||||
total_sequences: True
|
|
||||||
avg_sequence_length: True
|
|
||||||
percent_duplicates: False
|
|
||||||
percent_gc: False
|
|
||||||
percent_fails: False
|
|
||||||
Falco (post-Trimming):
|
|
||||||
total_sequences: True
|
|
||||||
avg_sequence_length: True
|
|
||||||
percent_duplicates: False
|
|
||||||
percent_gc: False
|
|
||||||
percent_fails: False
|
|
||||||
BBDuk:
|
BBDuk:
|
||||||
Input reads: False
|
Input reads: False
|
||||||
Total Removed bases Percent: False
|
Total Removed bases Percent: False
|
||||||
|
@ -278,25 +256,13 @@ table_columns_visible:
|
||||||
motus: False
|
motus: False
|
||||||
|
|
||||||
table_columns_name:
|
table_columns_name:
|
||||||
FastQC (pre-Trimming):
|
FastQC / Falco (pre-Trimming):
|
||||||
total_sequences: "Nr. Input Reads"
|
total_sequences: "Nr. Input Reads"
|
||||||
avg_sequence_length: "Length Input Reads"
|
avg_sequence_length: "Length Input Reads"
|
||||||
percent_gc: "% GC Input Reads"
|
percent_gc: "% GC Input Reads"
|
||||||
percent_duplicates: "% Dups Input Reads"
|
percent_duplicates: "% Dups Input Reads"
|
||||||
percent_fails: "% Failed Input Reads"
|
percent_fails: "% Failed Input Reads"
|
||||||
Falco (pre-Trimming):
|
FastQC / Falco (post-Trimming):
|
||||||
total_sequences: "Nr. Input Reads"
|
|
||||||
avg_sequence_length: "Length Input Reads"
|
|
||||||
percent_gc: "% GC Input Reads"
|
|
||||||
percent_duplicates: "% Dups Input Reads"
|
|
||||||
percent_fails: "% Failed Input Reads"
|
|
||||||
FastQC (post-Trimming):
|
|
||||||
total_sequences: "Nr. Processed Reads"
|
|
||||||
avg_sequence_length: "Length Processed Reads"
|
|
||||||
percent_gc: "% GC Processed Reads"
|
|
||||||
percent_duplicates: "% Dups Processed Reads"
|
|
||||||
percent_fails: "% Failed Processed Reads"
|
|
||||||
Falco (post-Trimming):
|
|
||||||
total_sequences: "Nr. Processed Reads"
|
total_sequences: "Nr. Processed Reads"
|
||||||
avg_sequence_length: "Length Processed Reads"
|
avg_sequence_length: "Length Processed Reads"
|
||||||
percent_gc: "% GC Processed Reads"
|
percent_gc: "% GC Processed Reads"
|
||||||
|
@ -314,7 +280,8 @@ extra_fn_clean_exts:
|
||||||
- ".bbduk"
|
- ".bbduk"
|
||||||
- ".unmapped"
|
- ".unmapped"
|
||||||
- "_filtered"
|
- "_filtered"
|
||||||
- "_processed"
|
- type: remove
|
||||||
|
pattern: "_falco"
|
||||||
|
|
||||||
section_comments:
|
section_comments:
|
||||||
general_stats: "By default, all read count columns are displayed as millions (M) of reads."
|
general_stats: "By default, all read count columns are displayed as millions (M) of reads."
|
||||||
|
|
|
@ -486,7 +486,7 @@ process {
|
||||||
ext.args = { "${meta.db_params}" }
|
ext.args = { "${meta.db_params}" }
|
||||||
}
|
}
|
||||||
|
|
||||||
withName: '.*PROFILING:KAIJU_KAIJU2TABLE' {
|
withName: 'KAIJU_KAIJU2TABLE_SINGLE' {
|
||||||
ext.prefix = params.perform_runmerging ? { "${meta.id}_${meta.db_name}.kaijutable" } : { "${meta.id}_${meta.run_accession}_${meta.db_name}.kaijutable" }
|
ext.prefix = params.perform_runmerging ? { "${meta.id}_${meta.db_name}.kaijutable" } : { "${meta.id}_${meta.run_accession}_${meta.db_name}.kaijutable" }
|
||||||
publishDir = [
|
publishDir = [
|
||||||
path: { "${params.outdir}/kaiju/${meta.db_name}/" },
|
path: { "${params.outdir}/kaiju/${meta.db_name}/" },
|
||||||
|
@ -495,7 +495,7 @@ process {
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|
||||||
withName: '.*STANDARDISATION_PROFILES:KAIJU_KAIJU2TABLE' {
|
withName: 'KAIJU_KAIJU2TABLE_COMBINED' {
|
||||||
ext.prefix = { "kaiju_${meta.id}_combined_reports" }
|
ext.prefix = { "kaiju_${meta.id}_combined_reports" }
|
||||||
publishDir = [
|
publishDir = [
|
||||||
path: { "${params.outdir}/kaiju/" },
|
path: { "${params.outdir}/kaiju/" },
|
||||||
|
|
|
@ -35,19 +35,21 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d
|
||||||
- [MultiQC](#multiqc) - Aggregate report describing results and QC from the whole pipeline
|
- [MultiQC](#multiqc) - Aggregate report describing results and QC from the whole pipeline
|
||||||
- [Pipeline information](#pipeline-information) - Report metrics generated during the workflow execution
|
- [Pipeline information](#pipeline-information) - Report metrics generated during the workflow execution
|
||||||
|
|
||||||
### FastQC or falco
|
### FastQC or Falco
|
||||||
|
|
||||||
<details markdown="1">
|
<details markdown="1">
|
||||||
<summary>Output files</summary>
|
<summary>Output files</summary>
|
||||||
|
|
||||||
- `fastqc/`
|
- `fastqc/`
|
||||||
- `*_fastqc.html`: FastQC report containing quality metrics.
|
- `*_fastqc.html`: FastQC or Falco report containing quality metrics.
|
||||||
- `*_fastqc.zip`: Zip archive containing the FastQC report, tab-delimited data file and plot images.
|
- `*_fastqc.zip`: Zip archive containing the FastQC report, tab-delimited data file and plot images (FastQC only).
|
||||||
|
|
||||||
</details>
|
</details>
|
||||||
|
|
||||||
[FastQC](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/) gives general quality metrics about your sequenced reads. It provides information about the quality score distribution across your reads, per base sequence content (%A/T/G/C), adapter contamination and overrepresented sequences. For further reading and documentation see the [FastQC help pages](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/).
|
[FastQC](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/) gives general quality metrics about your sequenced reads. It provides information about the quality score distribution across your reads, per base sequence content (%A/T/G/C), adapter contamination and overrepresented sequences. For further reading and documentation see the [FastQC help pages](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/).
|
||||||
|
|
||||||
|
If preprocessing is turned on, nf-core/taxprofiler runs FastQC/Falco twice -once before and once after adapter removal/read merging, to allow evaluation of the performance of these preprocessing steps. Note in the General Stats table, the columns of these two instances of FastQC/Falco are placed next to each other to make it easier to evaluate. However, the columns of the actual preprocessing steps (i.e, fastp, AdapterRemoval, and Porechop) will be displayed _after_ the two FastQC/Falco columns, even if they were run 'between' the two FastQC/Falco jobs in the pipeline itself.
|
||||||
|
|
||||||
> ℹ️ Falco produces identical output to FastQC but in the `falco/` directory.
|
> ℹ️ Falco produces identical output to FastQC but in the `falco/` directory.
|
||||||
|
|
||||||
![MultiQC - FastQC sequence counts plot](images/mqc_fastqc_counts.png)
|
![MultiQC - FastQC sequence counts plot](images/mqc_fastqc_counts.png)
|
||||||
|
@ -56,8 +58,6 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d
|
||||||
|
|
||||||
![MultiQC - FastQC adapter content plot](images/mqc_fastqc_adapter.png)
|
![MultiQC - FastQC adapter content plot](images/mqc_fastqc_adapter.png)
|
||||||
|
|
||||||
> **NB:** The FastQC plots displayed in the MultiQC report shows _untrimmed_ reads. They may contain adapter sequence and potentially regions with low quality.
|
|
||||||
|
|
||||||
### fastp
|
### fastp
|
||||||
|
|
||||||
[fastp](https://github.com/OpenGene/fastp) is a FASTQ pre-processing tool for quality control, trimmming of adapters, quality filtering and other features.
|
[fastp](https://github.com/OpenGene/fastp) is a FASTQ pre-processing tool for quality control, trimmming of adapters, quality filtering and other features.
|
||||||
|
|
|
@ -11,7 +11,7 @@ include { CENTRIFUGE_CENTRIFUGE } from '../../modules/nf-core/ce
|
||||||
include { CENTRIFUGE_KREPORT } from '../../modules/nf-core/centrifuge/kreport/main'
|
include { CENTRIFUGE_KREPORT } from '../../modules/nf-core/centrifuge/kreport/main'
|
||||||
include { METAPHLAN3_METAPHLAN3 } from '../../modules/nf-core/metaphlan3/metaphlan3/main'
|
include { METAPHLAN3_METAPHLAN3 } from '../../modules/nf-core/metaphlan3/metaphlan3/main'
|
||||||
include { KAIJU_KAIJU } from '../../modules/nf-core/kaiju/kaiju/main'
|
include { KAIJU_KAIJU } from '../../modules/nf-core/kaiju/kaiju/main'
|
||||||
include { KAIJU_KAIJU2TABLE } from '../../modules/nf-core/kaiju/kaiju2table/main'
|
include { KAIJU_KAIJU2TABLE as KAIJU_KAIJU2TABLE_SINGLE } from '../../modules/nf-core/kaiju/kaiju2table/main'
|
||||||
include { DIAMOND_BLASTX } from '../../modules/nf-core/diamond/blastx/main'
|
include { DIAMOND_BLASTX } from '../../modules/nf-core/diamond/blastx/main'
|
||||||
include { MOTUS_PROFILE } from '../../modules/nf-core/motus/profile/main'
|
include { MOTUS_PROFILE } from '../../modules/nf-core/motus/profile/main'
|
||||||
include { KRAKENUNIQ_PRELOADEDKRAKENUNIQ } from '../../modules/nf-core/krakenuniq/preloadedkrakenuniq/main'
|
include { KRAKENUNIQ_PRELOADEDKRAKENUNIQ } from '../../modules/nf-core/krakenuniq/preloadedkrakenuniq/main'
|
||||||
|
@ -272,10 +272,10 @@ workflow PROFILING {
|
||||||
ch_versions = ch_versions.mix( KAIJU_KAIJU.out.versions.first() )
|
ch_versions = ch_versions.mix( KAIJU_KAIJU.out.versions.first() )
|
||||||
ch_raw_classifications = ch_raw_classifications.mix( KAIJU_KAIJU.out.results )
|
ch_raw_classifications = ch_raw_classifications.mix( KAIJU_KAIJU.out.results )
|
||||||
|
|
||||||
KAIJU_KAIJU2TABLE ( KAIJU_KAIJU.out.results, ch_input_for_kaiju.db, params.kaiju_taxon_rank)
|
KAIJU_KAIJU2TABLE_SINGLE ( KAIJU_KAIJU.out.results, ch_input_for_kaiju.db, params.kaiju_taxon_rank)
|
||||||
ch_versions = ch_versions.mix( KAIJU_KAIJU2TABLE.out.versions )
|
ch_versions = ch_versions.mix( KAIJU_KAIJU2TABLE_SINGLE.out.versions )
|
||||||
ch_multiqc_files = ch_multiqc_files.mix( KAIJU_KAIJU2TABLE.out.summary )
|
ch_multiqc_files = ch_multiqc_files.mix( KAIJU_KAIJU2TABLE_SINGLE.out.summary )
|
||||||
ch_raw_profiles = ch_raw_profiles.mix( KAIJU_KAIJU2TABLE.out.summary )
|
ch_raw_profiles = ch_raw_profiles.mix( KAIJU_KAIJU2TABLE_SINGLE.out.summary )
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( params.run_diamond ) {
|
if ( params.run_diamond ) {
|
||||||
|
|
|
@ -3,7 +3,7 @@
|
||||||
//
|
//
|
||||||
|
|
||||||
include { BRACKEN_COMBINEBRACKENOUTPUTS } from '../../modules/nf-core/bracken/combinebrackenoutputs/main'
|
include { BRACKEN_COMBINEBRACKENOUTPUTS } from '../../modules/nf-core/bracken/combinebrackenoutputs/main'
|
||||||
include { KAIJU_KAIJU2TABLE } from '../../modules/nf-core/kaiju/kaiju2table/main'
|
include { KAIJU_KAIJU2TABLE as KAIJU_KAIJU2TABLE_COMBINED } from '../../modules/nf-core/kaiju/kaiju2table/main'
|
||||||
include { KRAKENTOOLS_COMBINEKREPORTS as KRAKENTOOLS_COMBINEKREPORTS_KRAKEN } from '../../modules/nf-core/krakentools/combinekreports/main'
|
include { KRAKENTOOLS_COMBINEKREPORTS as KRAKENTOOLS_COMBINEKREPORTS_KRAKEN } from '../../modules/nf-core/krakentools/combinekreports/main'
|
||||||
include { KRAKENTOOLS_COMBINEKREPORTS as KRAKENTOOLS_COMBINEKREPORTS_CENTRIFUGE } from '../../modules/nf-core/krakentools/combinekreports/main'
|
include { KRAKENTOOLS_COMBINEKREPORTS as KRAKENTOOLS_COMBINEKREPORTS_CENTRIFUGE } from '../../modules/nf-core/krakentools/combinekreports/main'
|
||||||
include { METAPHLAN3_MERGEMETAPHLANTABLES } from '../../modules/nf-core/metaphlan3/mergemetaphlantables/main'
|
include { METAPHLAN3_MERGEMETAPHLANTABLES } from '../../modules/nf-core/metaphlan3/mergemetaphlantables/main'
|
||||||
|
@ -103,9 +103,9 @@ workflow STANDARDISATION_PROFILES {
|
||||||
[[id:it[0]], it[1]]
|
[[id:it[0]], it[1]]
|
||||||
}
|
}
|
||||||
|
|
||||||
KAIJU_KAIJU2TABLE ( ch_profiles_for_kaiju, ch_input_databases.kaiju.map{it[1]}, params.kaiju_taxon_rank)
|
KAIJU_KAIJU2TABLE_COMBINED ( ch_profiles_for_kaiju, ch_input_databases.kaiju.map{it[1]}, params.kaiju_taxon_rank)
|
||||||
ch_multiqc_files = ch_multiqc_files.mix( KAIJU_KAIJU2TABLE.out.summary )
|
ch_multiqc_files = ch_multiqc_files.mix( KAIJU_KAIJU2TABLE_COMBINED.out.summary )
|
||||||
ch_versions = ch_versions.mix( KAIJU_KAIJU2TABLE.out.versions )
|
ch_versions = ch_versions.mix( KAIJU_KAIJU2TABLE_COMBINED.out.versions )
|
||||||
|
|
||||||
// Kraken2
|
// Kraken2
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue