mirror of
https://github.com/MillironX/taxprofiler.git
synced 2024-11-22 04:29:55 +00:00
Add Falco as an alternative to FastQC
This commit is contained in:
parent
4f27998852
commit
8eddb32b88
12 changed files with 217 additions and 22 deletions
1
.github/workflows/ci.yml
vendored
1
.github/workflows/ci.yml
vendored
|
@ -23,6 +23,7 @@ jobs:
|
|||
- "21.10.3"
|
||||
- "latest-everything"
|
||||
parameters:
|
||||
- "--perform_fastqc_alternative false"
|
||||
- "--perform_longread_qc false"
|
||||
- "--perform_shortread_qc false"
|
||||
- "--shortread_qc_tool fastp"
|
||||
|
|
|
@ -62,6 +62,10 @@
|
|||
|
||||
- [FILTLONG](https://github.com/rrwick/Filtlong)
|
||||
|
||||
- [Falco](https://doi.org/10.12688/f1000research.21142.2)
|
||||
|
||||
> de Sena Brandine G and Smith AD. Falco: high-speed FastQC emulation for quality control of sequencing data. F1000Research 2021, 8:1874
|
||||
|
||||
## Software packaging/containerisation tools
|
||||
|
||||
- [Anaconda](https://anaconda.com)
|
||||
|
|
|
@ -30,7 +30,7 @@ On release, automated continuous integration tests run the pipeline on a full-si
|
|||
|
||||
![](docs/images/taxprofiler_tube.png)
|
||||
|
||||
1. Read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/))
|
||||
1. Read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/) or [`Falco`](https://github.com/smithlabcode/falco) as an alternative option)
|
||||
2. Performs optional read pre-processing
|
||||
- Adapter clipping and merging (short-read: [fastp](https://github.com/OpenGene/fastp), [AdapterRemoval2](https://github.com/MikkelSchubert/adapterremoval); long-read: [porechop](https://github.com/rrwick/Porechop))
|
||||
- Low complexity and quality filtering (short-read: [bbduk](https://jgi.doe.gov/data-and-tools/software-tools/bbtools/), [PRINSEQ++](https://github.com/Adrian-Cantu/PRINSEQ-plus-plus); long-read: [Filtlong](https://github.com/rrwick/Filtlong))
|
||||
|
|
|
@ -40,6 +40,24 @@ process {
|
|||
]
|
||||
}
|
||||
|
||||
withName: FALCO {
|
||||
ext.prefix = { "${meta.id}_${meta.run_accession}_raw" }
|
||||
publishDir = [
|
||||
path: { "${params.outdir}/falco/raw" },
|
||||
mode: params.publish_dir_mode,
|
||||
pattern: '*.{html,txt}'
|
||||
]
|
||||
}
|
||||
|
||||
withName: FALCO_PROCESSED {
|
||||
ext.prefix = { "${meta.id}_${meta.run_accession}_processed" }
|
||||
publishDir = [
|
||||
path: { "${params.outdir}/falco/processed" },
|
||||
mode: params.publish_dir_mode,
|
||||
pattern: '*.{html,txt}'
|
||||
]
|
||||
}
|
||||
|
||||
withName: FASTP_SINGLE {
|
||||
ext.args = [
|
||||
// trimming options
|
||||
|
|
|
@ -165,7 +165,9 @@ work # Directory containing the nextflow working files
|
|||
.nextflow_log # Log file from Nextflow
|
||||
# Other nextflow hidden files, eg. history of pipeline runs and old logs.
|
||||
```
|
||||
### Sequencing quality control
|
||||
|
||||
nf-core taxprofiler offers [`Falco`](https://github.com/smithlabcode/falco] as an alternative option to [`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/).
|
||||
### Preprocessing Steps
|
||||
|
||||
nf-core/taxprofiler offers four main preprocessing steps
|
||||
|
@ -179,7 +181,7 @@ nf-core/taxprofiler offers four main preprocessing steps
|
|||
|
||||
Raw sequencing read processing in the form of adapter clipping and paired-end read merging can be activated via the `--perform_shortread_qc` or `--perform_longread_qc` flags.
|
||||
|
||||
It is highly recommended to run this on raw reads to remove artefacts from sequencing that can cause false positive identification of taxa (e.g. contaminated reference genomes) and/or skews in taxonomic abundance profiles.
|
||||
It is highly recommended to run this on raw reads to remove artifacts from sequencing that can cause false positive identification of taxa (e.g. contaminated reference genomes) and/or skews in taxonomic abundance profiles.
|
||||
|
||||
There are currently two options for short-read preprocessing: `fastp` or `adapterremoval`.
|
||||
|
||||
|
|
|
@ -49,6 +49,10 @@
|
|||
"branch": "master",
|
||||
"git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905"
|
||||
},
|
||||
"falco": {
|
||||
"branch": "master",
|
||||
"git_sha": "fc959214036403ad83efe7a41d43d0606c445cda"
|
||||
},
|
||||
"fastp": {
|
||||
"branch": "master",
|
||||
"git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905"
|
||||
|
|
57
modules/nf-core/falco/main.nf
generated
Normal file
57
modules/nf-core/falco/main.nf
generated
Normal file
|
@ -0,0 +1,57 @@
|
|||
process FALCO {
|
||||
tag "$meta.id"
|
||||
label 'process_single'
|
||||
|
||||
|
||||
conda (params.enable_conda ? "bioconda::falco=1.2.1" : null)
|
||||
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
|
||||
'https://depot.galaxyproject.org/singularity/falco:1.2.1--h867801b_3':
|
||||
'quay.io/biocontainers/falco:1.2.1--h867801b_3' }"
|
||||
|
||||
input:
|
||||
tuple val(meta), path(reads)
|
||||
|
||||
output:
|
||||
tuple val(meta), path("*.html"), emit: html
|
||||
tuple val(meta), path("*.txt") , emit: txt
|
||||
path "versions.yml" , emit: versions
|
||||
|
||||
when:
|
||||
task.ext.when == null || task.ext.when
|
||||
|
||||
script:
|
||||
def args = task.ext.args ?: ''
|
||||
def prefix = task.ext.prefix ?: "${meta.id}"
|
||||
if ( reads.toList().size() == 1 ) {
|
||||
"""
|
||||
falco $args --threads $task.cpus ${reads} -D ${prefix}_data.txt -S ${prefix}_summary.txt -R ${prefix}_report.html
|
||||
|
||||
cat <<-END_VERSIONS > versions.yml
|
||||
"${task.process}":
|
||||
falco:\$( falco --version | sed -e "s/falco//g" )
|
||||
END_VERSIONS
|
||||
"""
|
||||
} else {
|
||||
"""
|
||||
falco $args --threads $task.cpus ${reads}
|
||||
|
||||
cat <<-END_VERSIONS > versions.yml
|
||||
"${task.process}":
|
||||
falco:\$( falco --version | sed -e "s/falco//g" )
|
||||
END_VERSIONS
|
||||
"""
|
||||
}
|
||||
|
||||
stub:
|
||||
def prefix = task.ext.prefix ?: "${meta.id}"
|
||||
"""
|
||||
touch ${prefix}_data.txt
|
||||
touch ${prefix}_fastqc_data.html
|
||||
touch ${prefix}_summary.txt
|
||||
|
||||
cat <<-END_VERSIONS > versions.yml
|
||||
"${task.process}":
|
||||
falco: \$( falco --version | sed -e "s/falco v//g" )
|
||||
END_VERSIONS
|
||||
"""
|
||||
}
|
52
modules/nf-core/falco/meta.yml
generated
Normal file
52
modules/nf-core/falco/meta.yml
generated
Normal file
|
@ -0,0 +1,52 @@
|
|||
name: falco
|
||||
description: Run falco on sequenced reads
|
||||
keywords:
|
||||
- quality control
|
||||
- qc
|
||||
- adapters
|
||||
- fastq
|
||||
tools:
|
||||
- fastqc:
|
||||
description: "falco is a drop-in C++ implementation of FastQC to assess the quality of sequence reads."
|
||||
|
||||
homepage: "https://falco.readthedocs.io/"
|
||||
documentation: "https://falco.readthedocs.io/"
|
||||
tool_dev_url: "None"
|
||||
doi: ""
|
||||
licence: "['GPL v3']"
|
||||
|
||||
input:
|
||||
- meta:
|
||||
type: map
|
||||
description: |
|
||||
Groovy Map containing sample information
|
||||
e.g. [ id:'test', single_end:false ]
|
||||
- reads:
|
||||
type: file
|
||||
description: |
|
||||
List of input FastQ files of size 1 and 2 for single-end and paired-end data,
|
||||
respectively.
|
||||
output:
|
||||
- meta:
|
||||
type: map
|
||||
description: |
|
||||
Groovy Map containing sample information
|
||||
e.g. [ id:'test', single_end:false ]
|
||||
- html:
|
||||
type: file
|
||||
description: FastQC like report
|
||||
pattern: "*_{fastqc_report.html}"
|
||||
- txt:
|
||||
type: file
|
||||
description: falco report data
|
||||
pattern: "*_{data.txt}"
|
||||
- txt:
|
||||
type: file
|
||||
description: falco summary file
|
||||
pattern: "*_{summary.txt}"
|
||||
- versions:
|
||||
type: file
|
||||
description: File containing software versions
|
||||
pattern: "versions.yml"
|
||||
authors:
|
||||
- "@lucacozzuto"
|
|
@ -10,7 +10,11 @@
|
|||
"type": "object",
|
||||
"fa_icon": "fas fa-terminal",
|
||||
"description": "Define where the pipeline should find input data and save output data.",
|
||||
"required": ["input", "outdir", "databases"],
|
||||
"required": [
|
||||
"input",
|
||||
"outdir",
|
||||
"databases"
|
||||
],
|
||||
"properties": {
|
||||
"input": {
|
||||
"type": "string",
|
||||
|
@ -80,7 +84,10 @@
|
|||
"shortread_qc_tool": {
|
||||
"type": "string",
|
||||
"default": "fastp",
|
||||
"enum": ["fastp", "adapterremoval"],
|
||||
"enum": [
|
||||
"fastp",
|
||||
"adapterremoval"
|
||||
],
|
||||
"fa_icon": "fas fa-tools",
|
||||
"description": "Specify which tool to use for short-read QC"
|
||||
},
|
||||
|
@ -133,7 +140,11 @@
|
|||
"shortread_complexityfilter_tool": {
|
||||
"type": "string",
|
||||
"default": "bbduk",
|
||||
"enum": ["bbduk", "prinseqplusplus", "fastp"],
|
||||
"enum": [
|
||||
"bbduk",
|
||||
"prinseqplusplus",
|
||||
"fastp"
|
||||
],
|
||||
"fa_icon": "fas fa-hammer",
|
||||
"description": "Specify which tool to use for complexity filtering"
|
||||
},
|
||||
|
@ -167,7 +178,10 @@
|
|||
"shortread_complexityfilter_prinseqplusplus_mode": {
|
||||
"type": "string",
|
||||
"default": "entropy",
|
||||
"enum": ["entropy", "dust"],
|
||||
"enum": [
|
||||
"entropy",
|
||||
"dust"
|
||||
],
|
||||
"fa_icon": "fas fa-check-square",
|
||||
"description": "Specify the complexity filter mode for PRINSEQ++"
|
||||
},
|
||||
|
@ -341,7 +355,15 @@
|
|||
"diamond_output_format": {
|
||||
"type": "string",
|
||||
"default": "tsv",
|
||||
"enum": ["blast", "xml", "txt", "daa", "sam", "tsv", "paf"],
|
||||
"enum": [
|
||||
"blast",
|
||||
"xml",
|
||||
"txt",
|
||||
"daa",
|
||||
"sam",
|
||||
"tsv",
|
||||
"paf"
|
||||
],
|
||||
"fa_icon": "fas fa-file",
|
||||
"description": "Specify output format from DIAMOND profiling.",
|
||||
"help_text": "DIAMOND can produce output in a number of different formats, you can specify here which to produce.\n\nNote that DIAMOND can only produce one format at a time, and depending on which you pick, some downstream steps may not be executed. For example, selecting `daa` or `sam` will mean you will not get a tabular taxonomic profile as with the other tools.\n\nWill be overriden by `--diamond_save_reads.`\n\n> Modifies tool parameter(s):\n> - diamond blastx: `--outfmt`"
|
||||
|
@ -360,7 +382,14 @@
|
|||
"kaiju_taxon_rank": {
|
||||
"type": "string",
|
||||
"default": "species",
|
||||
"enum": ["phylum", "class", "order", "family", "genus", "species"],
|
||||
"enum": [
|
||||
"phylum",
|
||||
"class",
|
||||
"order",
|
||||
"family",
|
||||
"genus",
|
||||
"species"
|
||||
],
|
||||
"fa_icon": "fas fa-tag",
|
||||
"description": "Specify taxonomic rank to be displayed in Kaiju taxon table",
|
||||
"help_text": "Specify the taxonomic level(s) to be displayed in the resulting Kaiju taxon table, as generated by the kaiju2table helper tool.\n\nThis can be either a single level (e.g. `species`), or a comma separated list to display the full taxonomic path (e.g. `superkingdom,phylum,class,order,family,genus,species.`).\n\n> Modifies tool parameter(s):\n> - kaiju2table: `-l`"
|
||||
|
@ -555,7 +584,14 @@
|
|||
"description": "Method used to save pipeline results to output directory.",
|
||||
"help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.",
|
||||
"fa_icon": "fas fa-copy",
|
||||
"enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"],
|
||||
"enum": [
|
||||
"symlink",
|
||||
"rellink",
|
||||
"link",
|
||||
"copy",
|
||||
"copyNoFollow",
|
||||
"move"
|
||||
],
|
||||
"hidden": true
|
||||
},
|
||||
"email_on_fail": {
|
||||
|
|
|
@ -3,6 +3,8 @@
|
|||
//
|
||||
|
||||
include { FASTQC as FASTQC_PROCESSED } from '../../modules/nf-core/fastqc/main'
|
||||
include { FALCO as FALCO_PROCESSED } from '../../modules/nf-core/falco/main'
|
||||
|
||||
include { PORECHOP } from '../../modules/nf-core/porechop/main'
|
||||
include { FILTLONG } from '../../modules/nf-core/filtlong/main'
|
||||
|
||||
|
@ -52,8 +54,14 @@ workflow LONGREAD_PREPROCESSING {
|
|||
ch_multiqc_files = ch_multiqc_files.mix( FILTLONG.out.log )
|
||||
}
|
||||
|
||||
FASTQC_PROCESSED ( ch_processed_reads )
|
||||
ch_multiqc_files = ch_multiqc_files.mix( FASTQC_PROCESSED.out.zip )
|
||||
if (params.perform_fastqc_alternative) {
|
||||
FALCO_PROCESSED ( ch_processed_reads )
|
||||
ch_multiqc_files = ch_multiqc_files.mix( FALCO_PROCESSED.out.txt )
|
||||
|
||||
} else {
|
||||
FASTQC_PROCESSED ( ch_processed_reads )
|
||||
ch_multiqc_files = ch_multiqc_files.mix( FASTQC_PROCESSED.out.zip )
|
||||
}
|
||||
|
||||
emit:
|
||||
reads = ch_processed_reads // channel: [ val(meta), [ reads ] ]
|
||||
|
|
|
@ -5,7 +5,8 @@
|
|||
|
||||
include { SHORTREAD_FASTP } from './shortread_fastp'
|
||||
include { SHORTREAD_ADAPTERREMOVAL } from './shortread_adapterremoval'
|
||||
include { FASTQC as FASTQC_PROCESSED } from '../../modules/nf-core/fastqc/main'
|
||||
include { FASTQC as FASTQC_PROCESSED } from '../../modules/nf-core/fastqc/main'
|
||||
include { FALCO as FALCO_PROCESSED } from '../../modules/nf-core/falco/main'
|
||||
|
||||
workflow SHORTREAD_PREPROCESSING {
|
||||
take:
|
||||
|
@ -27,9 +28,16 @@ workflow SHORTREAD_PREPROCESSING {
|
|||
ch_processed_reads = reads
|
||||
}
|
||||
|
||||
FASTQC_PROCESSED ( ch_processed_reads )
|
||||
ch_versions = ch_versions.mix( FASTQC_PROCESSED.out.versions )
|
||||
ch_multiqc_files = ch_multiqc_files.mix( FASTQC_PROCESSED.out.zip )
|
||||
if (params.perform_fastqc_alternative) {
|
||||
FALCO_PROCESSED ( ch_processed_reads )
|
||||
ch_versions = ch_versions.mix( FALCO_PROCESSED.out.versions )
|
||||
ch_multiqc_files = ch_multiqc_files.mix( FALCO_PROCESSED.out.txt )
|
||||
|
||||
} else {
|
||||
FASTQC_PROCESSED ( ch_processed_reads )
|
||||
ch_versions = ch_versions.mix( FASTQC_PROCESSED.out.versions )
|
||||
ch_multiqc_files = ch_multiqc_files.mix( FASTQC_PROCESSED.out.zip )
|
||||
}
|
||||
|
||||
emit:
|
||||
reads = ch_processed_reads // channel: [ val(meta), [ reads ] ]
|
||||
|
|
|
@ -84,6 +84,7 @@ include { STANDARDISATION_PROFILES } from '../subworkflows/local/standardis
|
|||
// MODULE: Installed directly from nf-core/modules
|
||||
//
|
||||
include { FASTQC } from '../modules/nf-core/fastqc/main'
|
||||
include { FALCO } from '../modules/nf-core/falco/main'
|
||||
include { MULTIQC } from '../modules/nf-core/multiqc/main'
|
||||
include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main'
|
||||
include { CAT_FASTQ } from '../modules/nf-core/cat/fastq/main'
|
||||
|
@ -120,12 +121,13 @@ workflow TAXPROFILER {
|
|||
*/
|
||||
ch_input_for_fastqc = INPUT_CHECK.out.fastq.mix( INPUT_CHECK.out.nanopore )
|
||||
|
||||
FASTQC (
|
||||
ch_input_for_fastqc
|
||||
)
|
||||
|
||||
ch_versions = ch_versions.mix(FASTQC.out.versions.first())
|
||||
|
||||
if ( params.perform_fastqc_alternative ) {
|
||||
FALCO ( ch_input_for_fastqc )
|
||||
ch_versions = ch_versions.mix(FALCO.out.versions.first())
|
||||
} else {
|
||||
FASTQC ( ch_input_for_fastqc )
|
||||
ch_versions = ch_versions.mix(FASTQC.out.versions.first())
|
||||
}
|
||||
/*
|
||||
SUBWORKFLOW: PERFORM PREPROCESSING
|
||||
*/
|
||||
|
@ -254,7 +256,10 @@ workflow TAXPROFILER {
|
|||
ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml'))
|
||||
ch_multiqc_files = ch_multiqc_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml'))
|
||||
ch_multiqc_files = ch_multiqc_files.mix(CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml.collect())
|
||||
ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}.ifEmpty([]))
|
||||
|
||||
if (!params.perform_fastqc_alternative) {
|
||||
ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}.ifEmpty([]))
|
||||
}
|
||||
|
||||
if (params.perform_shortread_qc) {
|
||||
ch_multiqc_files = ch_multiqc_files.mix( SHORTREAD_PREPROCESSING.out.mqc.collect{it[1]}.ifEmpty([]) )
|
||||
|
|
Loading…
Reference in a new issue