1
0
Fork 0
mirror of https://github.com/MillironX/taxprofiler.git synced 2024-11-14 17:33:09 +00:00

Merge pull request #148 from genomic-medicine-sweden/add_falco

Add Falco as an alternative to FastQC
This commit is contained in:
Sofia Stamouli 2022-10-25 12:40:26 +02:00 committed by GitHub
commit 63c260bfbc
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
13 changed files with 192 additions and 16 deletions

View file

@ -23,6 +23,7 @@ jobs:
- "21.10.3" - "21.10.3"
- "latest-everything" - "latest-everything"
parameters: parameters:
- "--preprocessing_qc_tool falco"
- "--perform_longread_qc false" - "--perform_longread_qc false"
- "--perform_shortread_qc false" - "--perform_shortread_qc false"
- "--shortread_qc_tool fastp" - "--shortread_qc_tool fastp"

View file

@ -62,6 +62,10 @@
- [FILTLONG](https://github.com/rrwick/Filtlong) - [FILTLONG](https://github.com/rrwick/Filtlong)
- [falco](https://doi.org/10.12688/f1000research.21142.2)
> de Sena Brandine G and Smith AD. Falco: high-speed FastQC emulation for quality control of sequencing data. F1000Research 2021, 8:1874
## Software packaging/containerisation tools ## Software packaging/containerisation tools
- [Anaconda](https://anaconda.com) - [Anaconda](https://anaconda.com)

View file

@ -30,7 +30,7 @@ On release, automated continuous integration tests run the pipeline on a full-si
![](docs/images/taxprofiler_tube.png) ![](docs/images/taxprofiler_tube.png)
1. Read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/)) 1. Read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/) or [`falco`](https://github.com/smithlabcode/falco) as an alternative option)
2. Performs optional read pre-processing 2. Performs optional read pre-processing
- Adapter clipping and merging (short-read: [fastp](https://github.com/OpenGene/fastp), [AdapterRemoval2](https://github.com/MikkelSchubert/adapterremoval); long-read: [porechop](https://github.com/rrwick/Porechop)) - Adapter clipping and merging (short-read: [fastp](https://github.com/OpenGene/fastp), [AdapterRemoval2](https://github.com/MikkelSchubert/adapterremoval); long-read: [porechop](https://github.com/rrwick/Porechop))
- Low complexity and quality filtering (short-read: [bbduk](https://jgi.doe.gov/data-and-tools/software-tools/bbtools/), [PRINSEQ++](https://github.com/Adrian-Cantu/PRINSEQ-plus-plus); long-read: [Filtlong](https://github.com/rrwick/Filtlong)) - Low complexity and quality filtering (short-read: [bbduk](https://jgi.doe.gov/data-and-tools/software-tools/bbtools/), [PRINSEQ++](https://github.com/Adrian-Cantu/PRINSEQ-plus-plus); long-read: [Filtlong](https://github.com/rrwick/Filtlong))

View file

@ -40,6 +40,24 @@ process {
] ]
} }
withName: FALCO {
ext.prefix = { "${meta.id}_${meta.run_accession}_raw" }
publishDir = [
path: { "${params.outdir}/falco/raw" },
mode: params.publish_dir_mode,
pattern: '*.{html,txt}'
]
}
withName: FALCO_PROCESSED {
ext.prefix = { "${meta.id}_${meta.run_accession}_processed" }
publishDir = [
path: { "${params.outdir}/falco/processed" },
mode: params.publish_dir_mode,
pattern: '*.{html,txt}'
]
}
withName: FASTP_SINGLE { withName: FASTP_SINGLE {
ext.args = [ ext.args = [
// trimming options // trimming options

View file

@ -166,6 +166,10 @@ work # Directory containing the nextflow working files
# Other nextflow hidden files, eg. history of pipeline runs and old logs. # Other nextflow hidden files, eg. history of pipeline runs and old logs.
``` ```
### Sequencing quality control
nf-core taxprofiler offers [`falco`](https://github.com/smithlabcode/falco] as an alternative option to [`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/).
### Preprocessing Steps ### Preprocessing Steps
nf-core/taxprofiler offers four main preprocessing steps nf-core/taxprofiler offers four main preprocessing steps
@ -179,7 +183,7 @@ nf-core/taxprofiler offers four main preprocessing steps
Raw sequencing read processing in the form of adapter clipping and paired-end read merging can be activated via the `--perform_shortread_qc` or `--perform_longread_qc` flags. Raw sequencing read processing in the form of adapter clipping and paired-end read merging can be activated via the `--perform_shortread_qc` or `--perform_longread_qc` flags.
It is highly recommended to run this on raw reads to remove artefacts from sequencing that can cause false positive identification of taxa (e.g. contaminated reference genomes) and/or skews in taxonomic abundance profiles. It is highly recommended to run this on raw reads to remove artifacts from sequencing that can cause false positive identification of taxa (e.g. contaminated reference genomes) and/or skews in taxonomic abundance profiles.
There are currently two options for short-read preprocessing: `fastp` or `adapterremoval`. There are currently two options for short-read preprocessing: `fastp` or `adapterremoval`.

View file

@ -49,6 +49,10 @@
"branch": "master", "branch": "master",
"git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905" "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905"
}, },
"falco": {
"branch": "master",
"git_sha": "fc959214036403ad83efe7a41d43d0606c445cda"
},
"fastp": { "fastp": {
"branch": "master", "branch": "master",
"git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905" "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905"

57
modules/nf-core/falco/main.nf generated Normal file
View file

@ -0,0 +1,57 @@
process FALCO {
tag "$meta.id"
label 'process_single'
conda (params.enable_conda ? "bioconda::falco=1.2.1" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/falco:1.2.1--h867801b_3':
'quay.io/biocontainers/falco:1.2.1--h867801b_3' }"
input:
tuple val(meta), path(reads)
output:
tuple val(meta), path("*.html"), emit: html
tuple val(meta), path("*.txt") , emit: txt
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
if ( reads.toList().size() == 1 ) {
"""
falco $args --threads $task.cpus ${reads} -D ${prefix}_data.txt -S ${prefix}_summary.txt -R ${prefix}_report.html
cat <<-END_VERSIONS > versions.yml
"${task.process}":
falco:\$( falco --version | sed -e "s/falco//g" )
END_VERSIONS
"""
} else {
"""
falco $args --threads $task.cpus ${reads}
cat <<-END_VERSIONS > versions.yml
"${task.process}":
falco:\$( falco --version | sed -e "s/falco//g" )
END_VERSIONS
"""
}
stub:
def prefix = task.ext.prefix ?: "${meta.id}"
"""
touch ${prefix}_data.txt
touch ${prefix}_fastqc_data.html
touch ${prefix}_summary.txt
cat <<-END_VERSIONS > versions.yml
"${task.process}":
falco: \$( falco --version | sed -e "s/falco v//g" )
END_VERSIONS
"""
}

52
modules/nf-core/falco/meta.yml generated Normal file
View file

@ -0,0 +1,52 @@
name: falco
description: Run falco on sequenced reads
keywords:
- quality control
- qc
- adapters
- fastq
tools:
- fastqc:
description: "falco is a drop-in C++ implementation of FastQC to assess the quality of sequence reads."
homepage: "https://falco.readthedocs.io/"
documentation: "https://falco.readthedocs.io/"
tool_dev_url: "None"
doi: ""
licence: "['GPL v3']"
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- reads:
type: file
description: |
List of input FastQ files of size 1 and 2 for single-end and paired-end data,
respectively.
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- html:
type: file
description: FastQC like report
pattern: "*_{fastqc_report.html}"
- txt:
type: file
description: falco report data
pattern: "*_{data.txt}"
- txt:
type: file
description: falco summary file
pattern: "*_{summary.txt}"
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
authors:
- "@lucacozzuto"

View file

@ -59,6 +59,8 @@ params {
// Databases // Databases
databases = null databases = null
preprocessing_qc_tool = 'fastqc'
// FASTQ preprocessing // FASTQ preprocessing
perform_shortread_qc = false perform_shortread_qc = false
shortread_qc_tool = 'fastp' shortread_qc_tool = 'fastp'

View file

@ -707,5 +707,14 @@
{ {
"$ref": "#/definitions/reference_genome_options" "$ref": "#/definitions/reference_genome_options"
} }
] ],
"properties": {
"preprocessing_qc_tool": {
"type": "string",
"default": "fastqc",
"enum": ["fastqc", "falco"],
"help_text": "Falco is designed as a drop-in replacement for FastQC but written in C++ for faster computation. We particularly recommend using falco when using long reads (due to reduced memory constraints), however is also applicable for short reads.",
"description": "Specify the tool used for quality control of raw sequencing reads"
}
}
} }

View file

@ -3,6 +3,8 @@
// //
include { FASTQC as FASTQC_PROCESSED } from '../../modules/nf-core/fastqc/main' include { FASTQC as FASTQC_PROCESSED } from '../../modules/nf-core/fastqc/main'
include { FALCO as FALCO_PROCESSED } from '../../modules/nf-core/falco/main'
include { PORECHOP } from '../../modules/nf-core/porechop/main' include { PORECHOP } from '../../modules/nf-core/porechop/main'
include { FILTLONG } from '../../modules/nf-core/filtlong/main' include { FILTLONG } from '../../modules/nf-core/filtlong/main'
@ -52,8 +54,16 @@ workflow LONGREAD_PREPROCESSING {
ch_multiqc_files = ch_multiqc_files.mix( FILTLONG.out.log ) ch_multiqc_files = ch_multiqc_files.mix( FILTLONG.out.log )
} }
FASTQC_PROCESSED ( ch_processed_reads ) if (params.preprocessing_qc_tool == 'fastqc') {
ch_multiqc_files = ch_multiqc_files.mix( FASTQC_PROCESSED.out.zip ) FASTQC_PROCESSED ( ch_processed_reads )
ch_versions = ch_versions.mix( FASTQC_PROCESSED.out.versions )
ch_multiqc_files = ch_multiqc_files.mix( FASTQC_PROCESSED.out.zip )
} else if (params.preprocessing_qc_tool == 'falco') {
FALCO_PROCESSED ( ch_processed_reads )
ch_versions = ch_versions.mix( FALCO_PROCESSED.out.versions )
ch_multiqc_files = ch_multiqc_files.mix( FALCO_PROCESSED.out.txt )
}
emit: emit:
reads = ch_processed_reads // channel: [ val(meta), [ reads ] ] reads = ch_processed_reads // channel: [ val(meta), [ reads ] ]

View file

@ -5,7 +5,8 @@
include { SHORTREAD_FASTP } from './shortread_fastp' include { SHORTREAD_FASTP } from './shortread_fastp'
include { SHORTREAD_ADAPTERREMOVAL } from './shortread_adapterremoval' include { SHORTREAD_ADAPTERREMOVAL } from './shortread_adapterremoval'
include { FASTQC as FASTQC_PROCESSED } from '../../modules/nf-core/fastqc/main' include { FASTQC as FASTQC_PROCESSED } from '../../modules/nf-core/fastqc/main'
include { FALCO as FALCO_PROCESSED } from '../../modules/nf-core/falco/main'
workflow SHORTREAD_PREPROCESSING { workflow SHORTREAD_PREPROCESSING {
take: take:
@ -27,9 +28,15 @@ workflow SHORTREAD_PREPROCESSING {
ch_processed_reads = reads ch_processed_reads = reads
} }
FASTQC_PROCESSED ( ch_processed_reads ) if (params.preprocessing_qc_tool == 'fastqc') {
ch_versions = ch_versions.mix( FASTQC_PROCESSED.out.versions ) FASTQC_PROCESSED ( ch_processed_reads )
ch_multiqc_files = ch_multiqc_files.mix( FASTQC_PROCESSED.out.zip ) ch_versions = ch_versions.mix( FASTQC_PROCESSED.out.versions )
ch_multiqc_files = ch_multiqc_files.mix( FASTQC_PROCESSED.out.zip )
} else if (params.preprocessing_qc_tool == 'falco') {
FALCO_PROCESSED ( ch_processed_reads )
ch_versions = ch_versions.mix( FALCO_PROCESSED.out.versions )
ch_multiqc_files = ch_multiqc_files.mix( FALCO_PROCESSED.out.txt )
}
emit: emit:
reads = ch_processed_reads // channel: [ val(meta), [ reads ] ] reads = ch_processed_reads // channel: [ val(meta), [ reads ] ]

View file

@ -84,6 +84,7 @@ include { STANDARDISATION_PROFILES } from '../subworkflows/local/standardis
// MODULE: Installed directly from nf-core/modules // MODULE: Installed directly from nf-core/modules
// //
include { FASTQC } from '../modules/nf-core/fastqc/main' include { FASTQC } from '../modules/nf-core/fastqc/main'
include { FALCO } from '../modules/nf-core/falco/main'
include { MULTIQC } from '../modules/nf-core/multiqc/main' include { MULTIQC } from '../modules/nf-core/multiqc/main'
include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main' include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main'
include { CAT_FASTQ } from '../modules/nf-core/cat/fastq/main' include { CAT_FASTQ } from '../modules/nf-core/cat/fastq/main'
@ -120,12 +121,13 @@ workflow TAXPROFILER {
*/ */
ch_input_for_fastqc = INPUT_CHECK.out.fastq.mix( INPUT_CHECK.out.nanopore ) ch_input_for_fastqc = INPUT_CHECK.out.fastq.mix( INPUT_CHECK.out.nanopore )
FASTQC ( if ( params.preprocessing_qc_tool == 'falco' ) {
ch_input_for_fastqc FALCO ( ch_input_for_fastqc )
) ch_versions = ch_versions.mix(FALCO.out.versions.first())
} else {
ch_versions = ch_versions.mix(FASTQC.out.versions.first()) FASTQC ( ch_input_for_fastqc )
ch_versions = ch_versions.mix(FASTQC.out.versions.first())
}
/* /*
SUBWORKFLOW: PERFORM PREPROCESSING SUBWORKFLOW: PERFORM PREPROCESSING
*/ */
@ -254,7 +256,13 @@ workflow TAXPROFILER {
ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml'))
ch_multiqc_files = ch_multiqc_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml')) ch_multiqc_files = ch_multiqc_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml'))
ch_multiqc_files = ch_multiqc_files.mix(CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml.collect()) ch_multiqc_files = ch_multiqc_files.mix(CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml.collect())
ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}.ifEmpty([]))
if ( params.preprocessing_qc_tool == 'falco' ) {
ch_multiqc_files = ch_multiqc_files.mix(FALCO.out.txt.collect{it[1]}.ifEmpty([]))
} else {
ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}.ifEmpty([]))
}
if (params.perform_shortread_qc) { if (params.perform_shortread_qc) {
ch_multiqc_files = ch_multiqc_files.mix( SHORTREAD_PREPROCESSING.out.mqc.collect{it[1]}.ifEmpty([]) ) ch_multiqc_files = ch_multiqc_files.mix( SHORTREAD_PREPROCESSING.out.mqc.collect{it[1]}.ifEmpty([]) )