mirror of
https://github.com/MillironX/taxprofiler.git
synced 2024-11-26 02:49:54 +00:00
Merge pull request #148 from genomic-medicine-sweden/add_falco
Add Falco as an alternative to FastQC
This commit is contained in:
commit
63c260bfbc
13 changed files with 192 additions and 16 deletions
1
.github/workflows/ci.yml
vendored
1
.github/workflows/ci.yml
vendored
|
@ -23,6 +23,7 @@ jobs:
|
||||||
- "21.10.3"
|
- "21.10.3"
|
||||||
- "latest-everything"
|
- "latest-everything"
|
||||||
parameters:
|
parameters:
|
||||||
|
- "--preprocessing_qc_tool falco"
|
||||||
- "--perform_longread_qc false"
|
- "--perform_longread_qc false"
|
||||||
- "--perform_shortread_qc false"
|
- "--perform_shortread_qc false"
|
||||||
- "--shortread_qc_tool fastp"
|
- "--shortread_qc_tool fastp"
|
||||||
|
|
|
@ -62,6 +62,10 @@
|
||||||
|
|
||||||
- [FILTLONG](https://github.com/rrwick/Filtlong)
|
- [FILTLONG](https://github.com/rrwick/Filtlong)
|
||||||
|
|
||||||
|
- [falco](https://doi.org/10.12688/f1000research.21142.2)
|
||||||
|
|
||||||
|
> de Sena Brandine G and Smith AD. Falco: high-speed FastQC emulation for quality control of sequencing data. F1000Research 2021, 8:1874
|
||||||
|
|
||||||
## Software packaging/containerisation tools
|
## Software packaging/containerisation tools
|
||||||
|
|
||||||
- [Anaconda](https://anaconda.com)
|
- [Anaconda](https://anaconda.com)
|
||||||
|
|
|
@ -30,7 +30,7 @@ On release, automated continuous integration tests run the pipeline on a full-si
|
||||||
|
|
||||||
![](docs/images/taxprofiler_tube.png)
|
![](docs/images/taxprofiler_tube.png)
|
||||||
|
|
||||||
1. Read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/))
|
1. Read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/) or [`falco`](https://github.com/smithlabcode/falco) as an alternative option)
|
||||||
2. Performs optional read pre-processing
|
2. Performs optional read pre-processing
|
||||||
- Adapter clipping and merging (short-read: [fastp](https://github.com/OpenGene/fastp), [AdapterRemoval2](https://github.com/MikkelSchubert/adapterremoval); long-read: [porechop](https://github.com/rrwick/Porechop))
|
- Adapter clipping and merging (short-read: [fastp](https://github.com/OpenGene/fastp), [AdapterRemoval2](https://github.com/MikkelSchubert/adapterremoval); long-read: [porechop](https://github.com/rrwick/Porechop))
|
||||||
- Low complexity and quality filtering (short-read: [bbduk](https://jgi.doe.gov/data-and-tools/software-tools/bbtools/), [PRINSEQ++](https://github.com/Adrian-Cantu/PRINSEQ-plus-plus); long-read: [Filtlong](https://github.com/rrwick/Filtlong))
|
- Low complexity and quality filtering (short-read: [bbduk](https://jgi.doe.gov/data-and-tools/software-tools/bbtools/), [PRINSEQ++](https://github.com/Adrian-Cantu/PRINSEQ-plus-plus); long-read: [Filtlong](https://github.com/rrwick/Filtlong))
|
||||||
|
|
|
@ -40,6 +40,24 @@ process {
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
withName: FALCO {
|
||||||
|
ext.prefix = { "${meta.id}_${meta.run_accession}_raw" }
|
||||||
|
publishDir = [
|
||||||
|
path: { "${params.outdir}/falco/raw" },
|
||||||
|
mode: params.publish_dir_mode,
|
||||||
|
pattern: '*.{html,txt}'
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
withName: FALCO_PROCESSED {
|
||||||
|
ext.prefix = { "${meta.id}_${meta.run_accession}_processed" }
|
||||||
|
publishDir = [
|
||||||
|
path: { "${params.outdir}/falco/processed" },
|
||||||
|
mode: params.publish_dir_mode,
|
||||||
|
pattern: '*.{html,txt}'
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
withName: FASTP_SINGLE {
|
withName: FASTP_SINGLE {
|
||||||
ext.args = [
|
ext.args = [
|
||||||
// trimming options
|
// trimming options
|
||||||
|
|
|
@ -166,6 +166,10 @@ work # Directory containing the nextflow working files
|
||||||
# Other nextflow hidden files, eg. history of pipeline runs and old logs.
|
# Other nextflow hidden files, eg. history of pipeline runs and old logs.
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### Sequencing quality control
|
||||||
|
|
||||||
|
nf-core taxprofiler offers [`falco`](https://github.com/smithlabcode/falco] as an alternative option to [`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/).
|
||||||
|
|
||||||
### Preprocessing Steps
|
### Preprocessing Steps
|
||||||
|
|
||||||
nf-core/taxprofiler offers four main preprocessing steps
|
nf-core/taxprofiler offers four main preprocessing steps
|
||||||
|
@ -179,7 +183,7 @@ nf-core/taxprofiler offers four main preprocessing steps
|
||||||
|
|
||||||
Raw sequencing read processing in the form of adapter clipping and paired-end read merging can be activated via the `--perform_shortread_qc` or `--perform_longread_qc` flags.
|
Raw sequencing read processing in the form of adapter clipping and paired-end read merging can be activated via the `--perform_shortread_qc` or `--perform_longread_qc` flags.
|
||||||
|
|
||||||
It is highly recommended to run this on raw reads to remove artefacts from sequencing that can cause false positive identification of taxa (e.g. contaminated reference genomes) and/or skews in taxonomic abundance profiles.
|
It is highly recommended to run this on raw reads to remove artifacts from sequencing that can cause false positive identification of taxa (e.g. contaminated reference genomes) and/or skews in taxonomic abundance profiles.
|
||||||
|
|
||||||
There are currently two options for short-read preprocessing: `fastp` or `adapterremoval`.
|
There are currently two options for short-read preprocessing: `fastp` or `adapterremoval`.
|
||||||
|
|
||||||
|
|
|
@ -49,6 +49,10 @@
|
||||||
"branch": "master",
|
"branch": "master",
|
||||||
"git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905"
|
"git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905"
|
||||||
},
|
},
|
||||||
|
"falco": {
|
||||||
|
"branch": "master",
|
||||||
|
"git_sha": "fc959214036403ad83efe7a41d43d0606c445cda"
|
||||||
|
},
|
||||||
"fastp": {
|
"fastp": {
|
||||||
"branch": "master",
|
"branch": "master",
|
||||||
"git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905"
|
"git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905"
|
||||||
|
|
57
modules/nf-core/falco/main.nf
generated
Normal file
57
modules/nf-core/falco/main.nf
generated
Normal file
|
@ -0,0 +1,57 @@
|
||||||
|
process FALCO {
|
||||||
|
tag "$meta.id"
|
||||||
|
label 'process_single'
|
||||||
|
|
||||||
|
|
||||||
|
conda (params.enable_conda ? "bioconda::falco=1.2.1" : null)
|
||||||
|
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
|
||||||
|
'https://depot.galaxyproject.org/singularity/falco:1.2.1--h867801b_3':
|
||||||
|
'quay.io/biocontainers/falco:1.2.1--h867801b_3' }"
|
||||||
|
|
||||||
|
input:
|
||||||
|
tuple val(meta), path(reads)
|
||||||
|
|
||||||
|
output:
|
||||||
|
tuple val(meta), path("*.html"), emit: html
|
||||||
|
tuple val(meta), path("*.txt") , emit: txt
|
||||||
|
path "versions.yml" , emit: versions
|
||||||
|
|
||||||
|
when:
|
||||||
|
task.ext.when == null || task.ext.when
|
||||||
|
|
||||||
|
script:
|
||||||
|
def args = task.ext.args ?: ''
|
||||||
|
def prefix = task.ext.prefix ?: "${meta.id}"
|
||||||
|
if ( reads.toList().size() == 1 ) {
|
||||||
|
"""
|
||||||
|
falco $args --threads $task.cpus ${reads} -D ${prefix}_data.txt -S ${prefix}_summary.txt -R ${prefix}_report.html
|
||||||
|
|
||||||
|
cat <<-END_VERSIONS > versions.yml
|
||||||
|
"${task.process}":
|
||||||
|
falco:\$( falco --version | sed -e "s/falco//g" )
|
||||||
|
END_VERSIONS
|
||||||
|
"""
|
||||||
|
} else {
|
||||||
|
"""
|
||||||
|
falco $args --threads $task.cpus ${reads}
|
||||||
|
|
||||||
|
cat <<-END_VERSIONS > versions.yml
|
||||||
|
"${task.process}":
|
||||||
|
falco:\$( falco --version | sed -e "s/falco//g" )
|
||||||
|
END_VERSIONS
|
||||||
|
"""
|
||||||
|
}
|
||||||
|
|
||||||
|
stub:
|
||||||
|
def prefix = task.ext.prefix ?: "${meta.id}"
|
||||||
|
"""
|
||||||
|
touch ${prefix}_data.txt
|
||||||
|
touch ${prefix}_fastqc_data.html
|
||||||
|
touch ${prefix}_summary.txt
|
||||||
|
|
||||||
|
cat <<-END_VERSIONS > versions.yml
|
||||||
|
"${task.process}":
|
||||||
|
falco: \$( falco --version | sed -e "s/falco v//g" )
|
||||||
|
END_VERSIONS
|
||||||
|
"""
|
||||||
|
}
|
52
modules/nf-core/falco/meta.yml
generated
Normal file
52
modules/nf-core/falco/meta.yml
generated
Normal file
|
@ -0,0 +1,52 @@
|
||||||
|
name: falco
|
||||||
|
description: Run falco on sequenced reads
|
||||||
|
keywords:
|
||||||
|
- quality control
|
||||||
|
- qc
|
||||||
|
- adapters
|
||||||
|
- fastq
|
||||||
|
tools:
|
||||||
|
- fastqc:
|
||||||
|
description: "falco is a drop-in C++ implementation of FastQC to assess the quality of sequence reads."
|
||||||
|
|
||||||
|
homepage: "https://falco.readthedocs.io/"
|
||||||
|
documentation: "https://falco.readthedocs.io/"
|
||||||
|
tool_dev_url: "None"
|
||||||
|
doi: ""
|
||||||
|
licence: "['GPL v3']"
|
||||||
|
|
||||||
|
input:
|
||||||
|
- meta:
|
||||||
|
type: map
|
||||||
|
description: |
|
||||||
|
Groovy Map containing sample information
|
||||||
|
e.g. [ id:'test', single_end:false ]
|
||||||
|
- reads:
|
||||||
|
type: file
|
||||||
|
description: |
|
||||||
|
List of input FastQ files of size 1 and 2 for single-end and paired-end data,
|
||||||
|
respectively.
|
||||||
|
output:
|
||||||
|
- meta:
|
||||||
|
type: map
|
||||||
|
description: |
|
||||||
|
Groovy Map containing sample information
|
||||||
|
e.g. [ id:'test', single_end:false ]
|
||||||
|
- html:
|
||||||
|
type: file
|
||||||
|
description: FastQC like report
|
||||||
|
pattern: "*_{fastqc_report.html}"
|
||||||
|
- txt:
|
||||||
|
type: file
|
||||||
|
description: falco report data
|
||||||
|
pattern: "*_{data.txt}"
|
||||||
|
- txt:
|
||||||
|
type: file
|
||||||
|
description: falco summary file
|
||||||
|
pattern: "*_{summary.txt}"
|
||||||
|
- versions:
|
||||||
|
type: file
|
||||||
|
description: File containing software versions
|
||||||
|
pattern: "versions.yml"
|
||||||
|
authors:
|
||||||
|
- "@lucacozzuto"
|
|
@ -59,6 +59,8 @@ params {
|
||||||
// Databases
|
// Databases
|
||||||
databases = null
|
databases = null
|
||||||
|
|
||||||
|
preprocessing_qc_tool = 'fastqc'
|
||||||
|
|
||||||
// FASTQ preprocessing
|
// FASTQ preprocessing
|
||||||
perform_shortread_qc = false
|
perform_shortread_qc = false
|
||||||
shortread_qc_tool = 'fastp'
|
shortread_qc_tool = 'fastp'
|
||||||
|
|
|
@ -707,5 +707,14 @@
|
||||||
{
|
{
|
||||||
"$ref": "#/definitions/reference_genome_options"
|
"$ref": "#/definitions/reference_genome_options"
|
||||||
}
|
}
|
||||||
]
|
],
|
||||||
|
"properties": {
|
||||||
|
"preprocessing_qc_tool": {
|
||||||
|
"type": "string",
|
||||||
|
"default": "fastqc",
|
||||||
|
"enum": ["fastqc", "falco"],
|
||||||
|
"help_text": "Falco is designed as a drop-in replacement for FastQC but written in C++ for faster computation. We particularly recommend using falco when using long reads (due to reduced memory constraints), however is also applicable for short reads.",
|
||||||
|
"description": "Specify the tool used for quality control of raw sequencing reads"
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -3,6 +3,8 @@
|
||||||
//
|
//
|
||||||
|
|
||||||
include { FASTQC as FASTQC_PROCESSED } from '../../modules/nf-core/fastqc/main'
|
include { FASTQC as FASTQC_PROCESSED } from '../../modules/nf-core/fastqc/main'
|
||||||
|
include { FALCO as FALCO_PROCESSED } from '../../modules/nf-core/falco/main'
|
||||||
|
|
||||||
include { PORECHOP } from '../../modules/nf-core/porechop/main'
|
include { PORECHOP } from '../../modules/nf-core/porechop/main'
|
||||||
include { FILTLONG } from '../../modules/nf-core/filtlong/main'
|
include { FILTLONG } from '../../modules/nf-core/filtlong/main'
|
||||||
|
|
||||||
|
@ -52,9 +54,17 @@ workflow LONGREAD_PREPROCESSING {
|
||||||
ch_multiqc_files = ch_multiqc_files.mix( FILTLONG.out.log )
|
ch_multiqc_files = ch_multiqc_files.mix( FILTLONG.out.log )
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (params.preprocessing_qc_tool == 'fastqc') {
|
||||||
FASTQC_PROCESSED ( ch_processed_reads )
|
FASTQC_PROCESSED ( ch_processed_reads )
|
||||||
|
ch_versions = ch_versions.mix( FASTQC_PROCESSED.out.versions )
|
||||||
ch_multiqc_files = ch_multiqc_files.mix( FASTQC_PROCESSED.out.zip )
|
ch_multiqc_files = ch_multiqc_files.mix( FASTQC_PROCESSED.out.zip )
|
||||||
|
|
||||||
|
} else if (params.preprocessing_qc_tool == 'falco') {
|
||||||
|
FALCO_PROCESSED ( ch_processed_reads )
|
||||||
|
ch_versions = ch_versions.mix( FALCO_PROCESSED.out.versions )
|
||||||
|
ch_multiqc_files = ch_multiqc_files.mix( FALCO_PROCESSED.out.txt )
|
||||||
|
}
|
||||||
|
|
||||||
emit:
|
emit:
|
||||||
reads = ch_processed_reads // channel: [ val(meta), [ reads ] ]
|
reads = ch_processed_reads // channel: [ val(meta), [ reads ] ]
|
||||||
versions = ch_versions // channel: [ versions.yml ]
|
versions = ch_versions // channel: [ versions.yml ]
|
||||||
|
|
|
@ -6,6 +6,7 @@
|
||||||
include { SHORTREAD_FASTP } from './shortread_fastp'
|
include { SHORTREAD_FASTP } from './shortread_fastp'
|
||||||
include { SHORTREAD_ADAPTERREMOVAL } from './shortread_adapterremoval'
|
include { SHORTREAD_ADAPTERREMOVAL } from './shortread_adapterremoval'
|
||||||
include { FASTQC as FASTQC_PROCESSED } from '../../modules/nf-core/fastqc/main'
|
include { FASTQC as FASTQC_PROCESSED } from '../../modules/nf-core/fastqc/main'
|
||||||
|
include { FALCO as FALCO_PROCESSED } from '../../modules/nf-core/falco/main'
|
||||||
|
|
||||||
workflow SHORTREAD_PREPROCESSING {
|
workflow SHORTREAD_PREPROCESSING {
|
||||||
take:
|
take:
|
||||||
|
@ -27,9 +28,15 @@ workflow SHORTREAD_PREPROCESSING {
|
||||||
ch_processed_reads = reads
|
ch_processed_reads = reads
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (params.preprocessing_qc_tool == 'fastqc') {
|
||||||
FASTQC_PROCESSED ( ch_processed_reads )
|
FASTQC_PROCESSED ( ch_processed_reads )
|
||||||
ch_versions = ch_versions.mix( FASTQC_PROCESSED.out.versions )
|
ch_versions = ch_versions.mix( FASTQC_PROCESSED.out.versions )
|
||||||
ch_multiqc_files = ch_multiqc_files.mix( FASTQC_PROCESSED.out.zip )
|
ch_multiqc_files = ch_multiqc_files.mix( FASTQC_PROCESSED.out.zip )
|
||||||
|
} else if (params.preprocessing_qc_tool == 'falco') {
|
||||||
|
FALCO_PROCESSED ( ch_processed_reads )
|
||||||
|
ch_versions = ch_versions.mix( FALCO_PROCESSED.out.versions )
|
||||||
|
ch_multiqc_files = ch_multiqc_files.mix( FALCO_PROCESSED.out.txt )
|
||||||
|
}
|
||||||
|
|
||||||
emit:
|
emit:
|
||||||
reads = ch_processed_reads // channel: [ val(meta), [ reads ] ]
|
reads = ch_processed_reads // channel: [ val(meta), [ reads ] ]
|
||||||
|
|
|
@ -84,6 +84,7 @@ include { STANDARDISATION_PROFILES } from '../subworkflows/local/standardis
|
||||||
// MODULE: Installed directly from nf-core/modules
|
// MODULE: Installed directly from nf-core/modules
|
||||||
//
|
//
|
||||||
include { FASTQC } from '../modules/nf-core/fastqc/main'
|
include { FASTQC } from '../modules/nf-core/fastqc/main'
|
||||||
|
include { FALCO } from '../modules/nf-core/falco/main'
|
||||||
include { MULTIQC } from '../modules/nf-core/multiqc/main'
|
include { MULTIQC } from '../modules/nf-core/multiqc/main'
|
||||||
include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main'
|
include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main'
|
||||||
include { CAT_FASTQ } from '../modules/nf-core/cat/fastq/main'
|
include { CAT_FASTQ } from '../modules/nf-core/cat/fastq/main'
|
||||||
|
@ -120,12 +121,13 @@ workflow TAXPROFILER {
|
||||||
*/
|
*/
|
||||||
ch_input_for_fastqc = INPUT_CHECK.out.fastq.mix( INPUT_CHECK.out.nanopore )
|
ch_input_for_fastqc = INPUT_CHECK.out.fastq.mix( INPUT_CHECK.out.nanopore )
|
||||||
|
|
||||||
FASTQC (
|
if ( params.preprocessing_qc_tool == 'falco' ) {
|
||||||
ch_input_for_fastqc
|
FALCO ( ch_input_for_fastqc )
|
||||||
)
|
ch_versions = ch_versions.mix(FALCO.out.versions.first())
|
||||||
|
} else {
|
||||||
|
FASTQC ( ch_input_for_fastqc )
|
||||||
ch_versions = ch_versions.mix(FASTQC.out.versions.first())
|
ch_versions = ch_versions.mix(FASTQC.out.versions.first())
|
||||||
|
}
|
||||||
/*
|
/*
|
||||||
SUBWORKFLOW: PERFORM PREPROCESSING
|
SUBWORKFLOW: PERFORM PREPROCESSING
|
||||||
*/
|
*/
|
||||||
|
@ -254,7 +256,13 @@ workflow TAXPROFILER {
|
||||||
ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml'))
|
ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml'))
|
||||||
ch_multiqc_files = ch_multiqc_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml'))
|
ch_multiqc_files = ch_multiqc_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml'))
|
||||||
ch_multiqc_files = ch_multiqc_files.mix(CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml.collect())
|
ch_multiqc_files = ch_multiqc_files.mix(CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml.collect())
|
||||||
|
|
||||||
|
if ( params.preprocessing_qc_tool == 'falco' ) {
|
||||||
|
ch_multiqc_files = ch_multiqc_files.mix(FALCO.out.txt.collect{it[1]}.ifEmpty([]))
|
||||||
|
} else {
|
||||||
ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}.ifEmpty([]))
|
ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}.ifEmpty([]))
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
if (params.perform_shortread_qc) {
|
if (params.perform_shortread_qc) {
|
||||||
ch_multiqc_files = ch_multiqc_files.mix( SHORTREAD_PREPROCESSING.out.mqc.collect{it[1]}.ifEmpty([]) )
|
ch_multiqc_files = ch_multiqc_files.mix( SHORTREAD_PREPROCESSING.out.mqc.collect{it[1]}.ifEmpty([]) )
|
||||||
|
|
Loading…
Reference in a new issue