1
0
Fork 0
mirror of https://github.com/MillironX/taxprofiler.git synced 2024-11-10 21:03:09 +00:00

Add Falco as an alternative to FastQC

This commit is contained in:
Sofia Stamouli 2022-10-18 17:43:16 +02:00
parent 4f27998852
commit 8eddb32b88
12 changed files with 217 additions and 22 deletions

View file

@ -23,6 +23,7 @@ jobs:
- "21.10.3"
- "latest-everything"
parameters:
- "--perform_fastqc_alternative false"
- "--perform_longread_qc false"
- "--perform_shortread_qc false"
- "--shortread_qc_tool fastp"

View file

@ -62,6 +62,10 @@
- [FILTLONG](https://github.com/rrwick/Filtlong)
- [Falco](https://doi.org/10.12688/f1000research.21142.2)
> de Sena Brandine G and Smith AD. Falco: high-speed FastQC emulation for quality control of sequencing data. F1000Research 2021, 8:1874
## Software packaging/containerisation tools
- [Anaconda](https://anaconda.com)

View file

@ -30,7 +30,7 @@ On release, automated continuous integration tests run the pipeline on a full-si
![](docs/images/taxprofiler_tube.png)
1. Read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/))
1. Read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/) or [`Falco`](https://github.com/smithlabcode/falco) as an alternative option)
2. Performs optional read pre-processing
- Adapter clipping and merging (short-read: [fastp](https://github.com/OpenGene/fastp), [AdapterRemoval2](https://github.com/MikkelSchubert/adapterremoval); long-read: [porechop](https://github.com/rrwick/Porechop))
- Low complexity and quality filtering (short-read: [bbduk](https://jgi.doe.gov/data-and-tools/software-tools/bbtools/), [PRINSEQ++](https://github.com/Adrian-Cantu/PRINSEQ-plus-plus); long-read: [Filtlong](https://github.com/rrwick/Filtlong))

View file

@ -40,6 +40,24 @@ process {
]
}
withName: FALCO {
ext.prefix = { "${meta.id}_${meta.run_accession}_raw" }
publishDir = [
path: { "${params.outdir}/falco/raw" },
mode: params.publish_dir_mode,
pattern: '*.{html,txt}'
]
}
withName: FALCO_PROCESSED {
ext.prefix = { "${meta.id}_${meta.run_accession}_processed" }
publishDir = [
path: { "${params.outdir}/falco/processed" },
mode: params.publish_dir_mode,
pattern: '*.{html,txt}'
]
}
withName: FASTP_SINGLE {
ext.args = [
// trimming options

View file

@ -165,7 +165,9 @@ work # Directory containing the nextflow working files
.nextflow_log # Log file from Nextflow
# Other nextflow hidden files, eg. history of pipeline runs and old logs.
```
### Sequencing quality control
nf-core taxprofiler offers [`Falco`](https://github.com/smithlabcode/falco] as an alternative option to [`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/).
### Preprocessing Steps
nf-core/taxprofiler offers four main preprocessing steps
@ -179,7 +181,7 @@ nf-core/taxprofiler offers four main preprocessing steps
Raw sequencing read processing in the form of adapter clipping and paired-end read merging can be activated via the `--perform_shortread_qc` or `--perform_longread_qc` flags.
It is highly recommended to run this on raw reads to remove artefacts from sequencing that can cause false positive identification of taxa (e.g. contaminated reference genomes) and/or skews in taxonomic abundance profiles.
It is highly recommended to run this on raw reads to remove artifacts from sequencing that can cause false positive identification of taxa (e.g. contaminated reference genomes) and/or skews in taxonomic abundance profiles.
There are currently two options for short-read preprocessing: `fastp` or `adapterremoval`.

View file

@ -49,6 +49,10 @@
"branch": "master",
"git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905"
},
"falco": {
"branch": "master",
"git_sha": "fc959214036403ad83efe7a41d43d0606c445cda"
},
"fastp": {
"branch": "master",
"git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905"

57
modules/nf-core/falco/main.nf generated Normal file
View file

@ -0,0 +1,57 @@
process FALCO {
tag "$meta.id"
label 'process_single'
conda (params.enable_conda ? "bioconda::falco=1.2.1" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/falco:1.2.1--h867801b_3':
'quay.io/biocontainers/falco:1.2.1--h867801b_3' }"
input:
tuple val(meta), path(reads)
output:
tuple val(meta), path("*.html"), emit: html
tuple val(meta), path("*.txt") , emit: txt
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
if ( reads.toList().size() == 1 ) {
"""
falco $args --threads $task.cpus ${reads} -D ${prefix}_data.txt -S ${prefix}_summary.txt -R ${prefix}_report.html
cat <<-END_VERSIONS > versions.yml
"${task.process}":
falco:\$( falco --version | sed -e "s/falco//g" )
END_VERSIONS
"""
} else {
"""
falco $args --threads $task.cpus ${reads}
cat <<-END_VERSIONS > versions.yml
"${task.process}":
falco:\$( falco --version | sed -e "s/falco//g" )
END_VERSIONS
"""
}
stub:
def prefix = task.ext.prefix ?: "${meta.id}"
"""
touch ${prefix}_data.txt
touch ${prefix}_fastqc_data.html
touch ${prefix}_summary.txt
cat <<-END_VERSIONS > versions.yml
"${task.process}":
falco: \$( falco --version | sed -e "s/falco v//g" )
END_VERSIONS
"""
}

52
modules/nf-core/falco/meta.yml generated Normal file
View file

@ -0,0 +1,52 @@
name: falco
description: Run falco on sequenced reads
keywords:
- quality control
- qc
- adapters
- fastq
tools:
- fastqc:
description: "falco is a drop-in C++ implementation of FastQC to assess the quality of sequence reads."
homepage: "https://falco.readthedocs.io/"
documentation: "https://falco.readthedocs.io/"
tool_dev_url: "None"
doi: ""
licence: "['GPL v3']"
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- reads:
type: file
description: |
List of input FastQ files of size 1 and 2 for single-end and paired-end data,
respectively.
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- html:
type: file
description: FastQC like report
pattern: "*_{fastqc_report.html}"
- txt:
type: file
description: falco report data
pattern: "*_{data.txt}"
- txt:
type: file
description: falco summary file
pattern: "*_{summary.txt}"
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
authors:
- "@lucacozzuto"

View file

@ -10,7 +10,11 @@
"type": "object",
"fa_icon": "fas fa-terminal",
"description": "Define where the pipeline should find input data and save output data.",
"required": ["input", "outdir", "databases"],
"required": [
"input",
"outdir",
"databases"
],
"properties": {
"input": {
"type": "string",
@ -80,7 +84,10 @@
"shortread_qc_tool": {
"type": "string",
"default": "fastp",
"enum": ["fastp", "adapterremoval"],
"enum": [
"fastp",
"adapterremoval"
],
"fa_icon": "fas fa-tools",
"description": "Specify which tool to use for short-read QC"
},
@ -133,7 +140,11 @@
"shortread_complexityfilter_tool": {
"type": "string",
"default": "bbduk",
"enum": ["bbduk", "prinseqplusplus", "fastp"],
"enum": [
"bbduk",
"prinseqplusplus",
"fastp"
],
"fa_icon": "fas fa-hammer",
"description": "Specify which tool to use for complexity filtering"
},
@ -167,7 +178,10 @@
"shortread_complexityfilter_prinseqplusplus_mode": {
"type": "string",
"default": "entropy",
"enum": ["entropy", "dust"],
"enum": [
"entropy",
"dust"
],
"fa_icon": "fas fa-check-square",
"description": "Specify the complexity filter mode for PRINSEQ++"
},
@ -341,7 +355,15 @@
"diamond_output_format": {
"type": "string",
"default": "tsv",
"enum": ["blast", "xml", "txt", "daa", "sam", "tsv", "paf"],
"enum": [
"blast",
"xml",
"txt",
"daa",
"sam",
"tsv",
"paf"
],
"fa_icon": "fas fa-file",
"description": "Specify output format from DIAMOND profiling.",
"help_text": "DIAMOND can produce output in a number of different formats, you can specify here which to produce.\n\nNote that DIAMOND can only produce one format at a time, and depending on which you pick, some downstream steps may not be executed. For example, selecting `daa` or `sam` will mean you will not get a tabular taxonomic profile as with the other tools.\n\nWill be overriden by `--diamond_save_reads.`\n\n> Modifies tool parameter(s):\n> - diamond blastx: `--outfmt`"
@ -360,7 +382,14 @@
"kaiju_taxon_rank": {
"type": "string",
"default": "species",
"enum": ["phylum", "class", "order", "family", "genus", "species"],
"enum": [
"phylum",
"class",
"order",
"family",
"genus",
"species"
],
"fa_icon": "fas fa-tag",
"description": "Specify taxonomic rank to be displayed in Kaiju taxon table",
"help_text": "Specify the taxonomic level(s) to be displayed in the resulting Kaiju taxon table, as generated by the kaiju2table helper tool.\n\nThis can be either a single level (e.g. `species`), or a comma separated list to display the full taxonomic path (e.g. `superkingdom,phylum,class,order,family,genus,species.`).\n\n> Modifies tool parameter(s):\n> - kaiju2table: `-l`"
@ -555,7 +584,14 @@
"description": "Method used to save pipeline results to output directory.",
"help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.",
"fa_icon": "fas fa-copy",
"enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"],
"enum": [
"symlink",
"rellink",
"link",
"copy",
"copyNoFollow",
"move"
],
"hidden": true
},
"email_on_fail": {

View file

@ -3,6 +3,8 @@
//
include { FASTQC as FASTQC_PROCESSED } from '../../modules/nf-core/fastqc/main'
include { FALCO as FALCO_PROCESSED } from '../../modules/nf-core/falco/main'
include { PORECHOP } from '../../modules/nf-core/porechop/main'
include { FILTLONG } from '../../modules/nf-core/filtlong/main'
@ -52,8 +54,14 @@ workflow LONGREAD_PREPROCESSING {
ch_multiqc_files = ch_multiqc_files.mix( FILTLONG.out.log )
}
FASTQC_PROCESSED ( ch_processed_reads )
ch_multiqc_files = ch_multiqc_files.mix( FASTQC_PROCESSED.out.zip )
if (params.perform_fastqc_alternative) {
FALCO_PROCESSED ( ch_processed_reads )
ch_multiqc_files = ch_multiqc_files.mix( FALCO_PROCESSED.out.txt )
} else {
FASTQC_PROCESSED ( ch_processed_reads )
ch_multiqc_files = ch_multiqc_files.mix( FASTQC_PROCESSED.out.zip )
}
emit:
reads = ch_processed_reads // channel: [ val(meta), [ reads ] ]

View file

@ -5,7 +5,8 @@
include { SHORTREAD_FASTP } from './shortread_fastp'
include { SHORTREAD_ADAPTERREMOVAL } from './shortread_adapterremoval'
include { FASTQC as FASTQC_PROCESSED } from '../../modules/nf-core/fastqc/main'
include { FASTQC as FASTQC_PROCESSED } from '../../modules/nf-core/fastqc/main'
include { FALCO as FALCO_PROCESSED } from '../../modules/nf-core/falco/main'
workflow SHORTREAD_PREPROCESSING {
take:
@ -27,9 +28,16 @@ workflow SHORTREAD_PREPROCESSING {
ch_processed_reads = reads
}
FASTQC_PROCESSED ( ch_processed_reads )
ch_versions = ch_versions.mix( FASTQC_PROCESSED.out.versions )
ch_multiqc_files = ch_multiqc_files.mix( FASTQC_PROCESSED.out.zip )
if (params.perform_fastqc_alternative) {
FALCO_PROCESSED ( ch_processed_reads )
ch_versions = ch_versions.mix( FALCO_PROCESSED.out.versions )
ch_multiqc_files = ch_multiqc_files.mix( FALCO_PROCESSED.out.txt )
} else {
FASTQC_PROCESSED ( ch_processed_reads )
ch_versions = ch_versions.mix( FASTQC_PROCESSED.out.versions )
ch_multiqc_files = ch_multiqc_files.mix( FASTQC_PROCESSED.out.zip )
}
emit:
reads = ch_processed_reads // channel: [ val(meta), [ reads ] ]

View file

@ -84,6 +84,7 @@ include { STANDARDISATION_PROFILES } from '../subworkflows/local/standardis
// MODULE: Installed directly from nf-core/modules
//
include { FASTQC } from '../modules/nf-core/fastqc/main'
include { FALCO } from '../modules/nf-core/falco/main'
include { MULTIQC } from '../modules/nf-core/multiqc/main'
include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main'
include { CAT_FASTQ } from '../modules/nf-core/cat/fastq/main'
@ -120,12 +121,13 @@ workflow TAXPROFILER {
*/
ch_input_for_fastqc = INPUT_CHECK.out.fastq.mix( INPUT_CHECK.out.nanopore )
FASTQC (
ch_input_for_fastqc
)
ch_versions = ch_versions.mix(FASTQC.out.versions.first())
if ( params.perform_fastqc_alternative ) {
FALCO ( ch_input_for_fastqc )
ch_versions = ch_versions.mix(FALCO.out.versions.first())
} else {
FASTQC ( ch_input_for_fastqc )
ch_versions = ch_versions.mix(FASTQC.out.versions.first())
}
/*
SUBWORKFLOW: PERFORM PREPROCESSING
*/
@ -254,7 +256,10 @@ workflow TAXPROFILER {
ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml'))
ch_multiqc_files = ch_multiqc_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml'))
ch_multiqc_files = ch_multiqc_files.mix(CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml.collect())
ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}.ifEmpty([]))
if (!params.perform_fastqc_alternative) {
ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}.ifEmpty([]))
}
if (params.perform_shortread_qc) {
ch_multiqc_files = ch_multiqc_files.mix( SHORTREAD_PREPROCESSING.out.mqc.collect{it[1]}.ifEmpty([]) )