mirror of
https://github.com/MillironX/taxprofiler.git
synced 2024-11-13 06:43:09 +00:00
Merge pull request #148 from genomic-medicine-sweden/add_falco
Add Falco as an alternative to FastQC
This commit is contained in:
commit
63c260bfbc
13 changed files with 192 additions and 16 deletions
1
.github/workflows/ci.yml
vendored
1
.github/workflows/ci.yml
vendored
|
@ -23,6 +23,7 @@ jobs:
|
|||
- "21.10.3"
|
||||
- "latest-everything"
|
||||
parameters:
|
||||
- "--preprocessing_qc_tool falco"
|
||||
- "--perform_longread_qc false"
|
||||
- "--perform_shortread_qc false"
|
||||
- "--shortread_qc_tool fastp"
|
||||
|
|
|
@ -62,6 +62,10 @@
|
|||
|
||||
- [FILTLONG](https://github.com/rrwick/Filtlong)
|
||||
|
||||
- [falco](https://doi.org/10.12688/f1000research.21142.2)
|
||||
|
||||
> de Sena Brandine G and Smith AD. Falco: high-speed FastQC emulation for quality control of sequencing data. F1000Research 2021, 8:1874
|
||||
|
||||
## Software packaging/containerisation tools
|
||||
|
||||
- [Anaconda](https://anaconda.com)
|
||||
|
|
|
@ -30,7 +30,7 @@ On release, automated continuous integration tests run the pipeline on a full-si
|
|||
|
||||
![](docs/images/taxprofiler_tube.png)
|
||||
|
||||
1. Read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/))
|
||||
1. Read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/) or [`falco`](https://github.com/smithlabcode/falco) as an alternative option)
|
||||
2. Performs optional read pre-processing
|
||||
- Adapter clipping and merging (short-read: [fastp](https://github.com/OpenGene/fastp), [AdapterRemoval2](https://github.com/MikkelSchubert/adapterremoval); long-read: [porechop](https://github.com/rrwick/Porechop))
|
||||
- Low complexity and quality filtering (short-read: [bbduk](https://jgi.doe.gov/data-and-tools/software-tools/bbtools/), [PRINSEQ++](https://github.com/Adrian-Cantu/PRINSEQ-plus-plus); long-read: [Filtlong](https://github.com/rrwick/Filtlong))
|
||||
|
|
|
@ -40,6 +40,24 @@ process {
|
|||
]
|
||||
}
|
||||
|
||||
withName: FALCO {
|
||||
ext.prefix = { "${meta.id}_${meta.run_accession}_raw" }
|
||||
publishDir = [
|
||||
path: { "${params.outdir}/falco/raw" },
|
||||
mode: params.publish_dir_mode,
|
||||
pattern: '*.{html,txt}'
|
||||
]
|
||||
}
|
||||
|
||||
withName: FALCO_PROCESSED {
|
||||
ext.prefix = { "${meta.id}_${meta.run_accession}_processed" }
|
||||
publishDir = [
|
||||
path: { "${params.outdir}/falco/processed" },
|
||||
mode: params.publish_dir_mode,
|
||||
pattern: '*.{html,txt}'
|
||||
]
|
||||
}
|
||||
|
||||
withName: FASTP_SINGLE {
|
||||
ext.args = [
|
||||
// trimming options
|
||||
|
|
|
@ -166,6 +166,10 @@ work # Directory containing the nextflow working files
|
|||
# Other nextflow hidden files, eg. history of pipeline runs and old logs.
|
||||
```
|
||||
|
||||
### Sequencing quality control
|
||||
|
||||
nf-core taxprofiler offers [`falco`](https://github.com/smithlabcode/falco] as an alternative option to [`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/).
|
||||
|
||||
### Preprocessing Steps
|
||||
|
||||
nf-core/taxprofiler offers four main preprocessing steps
|
||||
|
@ -179,7 +183,7 @@ nf-core/taxprofiler offers four main preprocessing steps
|
|||
|
||||
Raw sequencing read processing in the form of adapter clipping and paired-end read merging can be activated via the `--perform_shortread_qc` or `--perform_longread_qc` flags.
|
||||
|
||||
It is highly recommended to run this on raw reads to remove artefacts from sequencing that can cause false positive identification of taxa (e.g. contaminated reference genomes) and/or skews in taxonomic abundance profiles.
|
||||
It is highly recommended to run this on raw reads to remove artifacts from sequencing that can cause false positive identification of taxa (e.g. contaminated reference genomes) and/or skews in taxonomic abundance profiles.
|
||||
|
||||
There are currently two options for short-read preprocessing: `fastp` or `adapterremoval`.
|
||||
|
||||
|
|
|
@ -49,6 +49,10 @@
|
|||
"branch": "master",
|
||||
"git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905"
|
||||
},
|
||||
"falco": {
|
||||
"branch": "master",
|
||||
"git_sha": "fc959214036403ad83efe7a41d43d0606c445cda"
|
||||
},
|
||||
"fastp": {
|
||||
"branch": "master",
|
||||
"git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905"
|
||||
|
|
57
modules/nf-core/falco/main.nf
generated
Normal file
57
modules/nf-core/falco/main.nf
generated
Normal file
|
@ -0,0 +1,57 @@
|
|||
process FALCO {
|
||||
tag "$meta.id"
|
||||
label 'process_single'
|
||||
|
||||
|
||||
conda (params.enable_conda ? "bioconda::falco=1.2.1" : null)
|
||||
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
|
||||
'https://depot.galaxyproject.org/singularity/falco:1.2.1--h867801b_3':
|
||||
'quay.io/biocontainers/falco:1.2.1--h867801b_3' }"
|
||||
|
||||
input:
|
||||
tuple val(meta), path(reads)
|
||||
|
||||
output:
|
||||
tuple val(meta), path("*.html"), emit: html
|
||||
tuple val(meta), path("*.txt") , emit: txt
|
||||
path "versions.yml" , emit: versions
|
||||
|
||||
when:
|
||||
task.ext.when == null || task.ext.when
|
||||
|
||||
script:
|
||||
def args = task.ext.args ?: ''
|
||||
def prefix = task.ext.prefix ?: "${meta.id}"
|
||||
if ( reads.toList().size() == 1 ) {
|
||||
"""
|
||||
falco $args --threads $task.cpus ${reads} -D ${prefix}_data.txt -S ${prefix}_summary.txt -R ${prefix}_report.html
|
||||
|
||||
cat <<-END_VERSIONS > versions.yml
|
||||
"${task.process}":
|
||||
falco:\$( falco --version | sed -e "s/falco//g" )
|
||||
END_VERSIONS
|
||||
"""
|
||||
} else {
|
||||
"""
|
||||
falco $args --threads $task.cpus ${reads}
|
||||
|
||||
cat <<-END_VERSIONS > versions.yml
|
||||
"${task.process}":
|
||||
falco:\$( falco --version | sed -e "s/falco//g" )
|
||||
END_VERSIONS
|
||||
"""
|
||||
}
|
||||
|
||||
stub:
|
||||
def prefix = task.ext.prefix ?: "${meta.id}"
|
||||
"""
|
||||
touch ${prefix}_data.txt
|
||||
touch ${prefix}_fastqc_data.html
|
||||
touch ${prefix}_summary.txt
|
||||
|
||||
cat <<-END_VERSIONS > versions.yml
|
||||
"${task.process}":
|
||||
falco: \$( falco --version | sed -e "s/falco v//g" )
|
||||
END_VERSIONS
|
||||
"""
|
||||
}
|
52
modules/nf-core/falco/meta.yml
generated
Normal file
52
modules/nf-core/falco/meta.yml
generated
Normal file
|
@ -0,0 +1,52 @@
|
|||
name: falco
|
||||
description: Run falco on sequenced reads
|
||||
keywords:
|
||||
- quality control
|
||||
- qc
|
||||
- adapters
|
||||
- fastq
|
||||
tools:
|
||||
- fastqc:
|
||||
description: "falco is a drop-in C++ implementation of FastQC to assess the quality of sequence reads."
|
||||
|
||||
homepage: "https://falco.readthedocs.io/"
|
||||
documentation: "https://falco.readthedocs.io/"
|
||||
tool_dev_url: "None"
|
||||
doi: ""
|
||||
licence: "['GPL v3']"
|
||||
|
||||
input:
|
||||
- meta:
|
||||
type: map
|
||||
description: |
|
||||
Groovy Map containing sample information
|
||||
e.g. [ id:'test', single_end:false ]
|
||||
- reads:
|
||||
type: file
|
||||
description: |
|
||||
List of input FastQ files of size 1 and 2 for single-end and paired-end data,
|
||||
respectively.
|
||||
output:
|
||||
- meta:
|
||||
type: map
|
||||
description: |
|
||||
Groovy Map containing sample information
|
||||
e.g. [ id:'test', single_end:false ]
|
||||
- html:
|
||||
type: file
|
||||
description: FastQC like report
|
||||
pattern: "*_{fastqc_report.html}"
|
||||
- txt:
|
||||
type: file
|
||||
description: falco report data
|
||||
pattern: "*_{data.txt}"
|
||||
- txt:
|
||||
type: file
|
||||
description: falco summary file
|
||||
pattern: "*_{summary.txt}"
|
||||
- versions:
|
||||
type: file
|
||||
description: File containing software versions
|
||||
pattern: "versions.yml"
|
||||
authors:
|
||||
- "@lucacozzuto"
|
|
@ -59,6 +59,8 @@ params {
|
|||
// Databases
|
||||
databases = null
|
||||
|
||||
preprocessing_qc_tool = 'fastqc'
|
||||
|
||||
// FASTQ preprocessing
|
||||
perform_shortread_qc = false
|
||||
shortread_qc_tool = 'fastp'
|
||||
|
|
|
@ -707,5 +707,14 @@
|
|||
{
|
||||
"$ref": "#/definitions/reference_genome_options"
|
||||
}
|
||||
]
|
||||
],
|
||||
"properties": {
|
||||
"preprocessing_qc_tool": {
|
||||
"type": "string",
|
||||
"default": "fastqc",
|
||||
"enum": ["fastqc", "falco"],
|
||||
"help_text": "Falco is designed as a drop-in replacement for FastQC but written in C++ for faster computation. We particularly recommend using falco when using long reads (due to reduced memory constraints), however is also applicable for short reads.",
|
||||
"description": "Specify the tool used for quality control of raw sequencing reads"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -3,6 +3,8 @@
|
|||
//
|
||||
|
||||
include { FASTQC as FASTQC_PROCESSED } from '../../modules/nf-core/fastqc/main'
|
||||
include { FALCO as FALCO_PROCESSED } from '../../modules/nf-core/falco/main'
|
||||
|
||||
include { PORECHOP } from '../../modules/nf-core/porechop/main'
|
||||
include { FILTLONG } from '../../modules/nf-core/filtlong/main'
|
||||
|
||||
|
@ -52,8 +54,16 @@ workflow LONGREAD_PREPROCESSING {
|
|||
ch_multiqc_files = ch_multiqc_files.mix( FILTLONG.out.log )
|
||||
}
|
||||
|
||||
FASTQC_PROCESSED ( ch_processed_reads )
|
||||
ch_multiqc_files = ch_multiqc_files.mix( FASTQC_PROCESSED.out.zip )
|
||||
if (params.preprocessing_qc_tool == 'fastqc') {
|
||||
FASTQC_PROCESSED ( ch_processed_reads )
|
||||
ch_versions = ch_versions.mix( FASTQC_PROCESSED.out.versions )
|
||||
ch_multiqc_files = ch_multiqc_files.mix( FASTQC_PROCESSED.out.zip )
|
||||
|
||||
} else if (params.preprocessing_qc_tool == 'falco') {
|
||||
FALCO_PROCESSED ( ch_processed_reads )
|
||||
ch_versions = ch_versions.mix( FALCO_PROCESSED.out.versions )
|
||||
ch_multiqc_files = ch_multiqc_files.mix( FALCO_PROCESSED.out.txt )
|
||||
}
|
||||
|
||||
emit:
|
||||
reads = ch_processed_reads // channel: [ val(meta), [ reads ] ]
|
||||
|
|
|
@ -5,7 +5,8 @@
|
|||
|
||||
include { SHORTREAD_FASTP } from './shortread_fastp'
|
||||
include { SHORTREAD_ADAPTERREMOVAL } from './shortread_adapterremoval'
|
||||
include { FASTQC as FASTQC_PROCESSED } from '../../modules/nf-core/fastqc/main'
|
||||
include { FASTQC as FASTQC_PROCESSED } from '../../modules/nf-core/fastqc/main'
|
||||
include { FALCO as FALCO_PROCESSED } from '../../modules/nf-core/falco/main'
|
||||
|
||||
workflow SHORTREAD_PREPROCESSING {
|
||||
take:
|
||||
|
@ -27,9 +28,15 @@ workflow SHORTREAD_PREPROCESSING {
|
|||
ch_processed_reads = reads
|
||||
}
|
||||
|
||||
FASTQC_PROCESSED ( ch_processed_reads )
|
||||
ch_versions = ch_versions.mix( FASTQC_PROCESSED.out.versions )
|
||||
ch_multiqc_files = ch_multiqc_files.mix( FASTQC_PROCESSED.out.zip )
|
||||
if (params.preprocessing_qc_tool == 'fastqc') {
|
||||
FASTQC_PROCESSED ( ch_processed_reads )
|
||||
ch_versions = ch_versions.mix( FASTQC_PROCESSED.out.versions )
|
||||
ch_multiqc_files = ch_multiqc_files.mix( FASTQC_PROCESSED.out.zip )
|
||||
} else if (params.preprocessing_qc_tool == 'falco') {
|
||||
FALCO_PROCESSED ( ch_processed_reads )
|
||||
ch_versions = ch_versions.mix( FALCO_PROCESSED.out.versions )
|
||||
ch_multiqc_files = ch_multiqc_files.mix( FALCO_PROCESSED.out.txt )
|
||||
}
|
||||
|
||||
emit:
|
||||
reads = ch_processed_reads // channel: [ val(meta), [ reads ] ]
|
||||
|
|
|
@ -84,6 +84,7 @@ include { STANDARDISATION_PROFILES } from '../subworkflows/local/standardis
|
|||
// MODULE: Installed directly from nf-core/modules
|
||||
//
|
||||
include { FASTQC } from '../modules/nf-core/fastqc/main'
|
||||
include { FALCO } from '../modules/nf-core/falco/main'
|
||||
include { MULTIQC } from '../modules/nf-core/multiqc/main'
|
||||
include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main'
|
||||
include { CAT_FASTQ } from '../modules/nf-core/cat/fastq/main'
|
||||
|
@ -120,12 +121,13 @@ workflow TAXPROFILER {
|
|||
*/
|
||||
ch_input_for_fastqc = INPUT_CHECK.out.fastq.mix( INPUT_CHECK.out.nanopore )
|
||||
|
||||
FASTQC (
|
||||
ch_input_for_fastqc
|
||||
)
|
||||
|
||||
ch_versions = ch_versions.mix(FASTQC.out.versions.first())
|
||||
|
||||
if ( params.preprocessing_qc_tool == 'falco' ) {
|
||||
FALCO ( ch_input_for_fastqc )
|
||||
ch_versions = ch_versions.mix(FALCO.out.versions.first())
|
||||
} else {
|
||||
FASTQC ( ch_input_for_fastqc )
|
||||
ch_versions = ch_versions.mix(FASTQC.out.versions.first())
|
||||
}
|
||||
/*
|
||||
SUBWORKFLOW: PERFORM PREPROCESSING
|
||||
*/
|
||||
|
@ -254,7 +256,13 @@ workflow TAXPROFILER {
|
|||
ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml'))
|
||||
ch_multiqc_files = ch_multiqc_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml'))
|
||||
ch_multiqc_files = ch_multiqc_files.mix(CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml.collect())
|
||||
ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}.ifEmpty([]))
|
||||
|
||||
if ( params.preprocessing_qc_tool == 'falco' ) {
|
||||
ch_multiqc_files = ch_multiqc_files.mix(FALCO.out.txt.collect{it[1]}.ifEmpty([]))
|
||||
} else {
|
||||
ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}.ifEmpty([]))
|
||||
}
|
||||
|
||||
|
||||
if (params.perform_shortread_qc) {
|
||||
ch_multiqc_files = ch_multiqc_files.mix( SHORTREAD_PREPROCESSING.out.mqc.collect{it[1]}.ifEmpty([]) )
|
||||
|
|
Loading…
Reference in a new issue