1
0
Fork 0
mirror of https://github.com/MillironX/taxprofiler.git synced 2024-11-22 15:09:54 +00:00

Merge pull request #27 from genomic-medicine-sweden/add_nanopore

Add nanopore reads preprocessing with Porechop
This commit is contained in:
James A. Fellows Yates 2022-03-21 19:41:58 +01:00 committed by GitHub
commit 51a0acd05b
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
10 changed files with 153 additions and 8 deletions

View file

@ -15,6 +15,8 @@
* [MultiQC](https://pubmed.ncbi.nlm.nih.gov/27312411/) * [MultiQC](https://pubmed.ncbi.nlm.nih.gov/27312411/)
> Ewels P, Magnusson M, Lundin S, Käller M. MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics. 2016 Oct 1;32(19):3047-8. doi: 10.1093/bioinformatics/btw354. Epub 2016 Jun 16. PubMed PMID: 27312411; PubMed Central PMCID: PMC5039924. > Ewels P, Magnusson M, Lundin S, Käller M. MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics. 2016 Oct 1;32(19):3047-8. doi: 10.1093/bioinformatics/btw354. Epub 2016 Jun 16. PubMed PMID: 27312411; PubMed Central PMCID: PMC5039924.
* [Porechop](https://github.com/rrwick/Porechop)
## Software packaging/containerisation tools ## Software packaging/containerisation tools
* [Anaconda](https://anaconda.com) * [Anaconda](https://anaconda.com)

View file

@ -30,7 +30,7 @@ On release, automated continuous integration tests run the pipeline on a full-si
1. Read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/)) 1. Read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/))
2. Performs optional read pre-processing 2. Performs optional read pre-processing
- Adapter clipping and merging - Adapter clipping and merging (short, and nanopore reads)
- Low complexity filtering - Low complexity filtering
- Host read removal - Host read removal
- Run merging - Run merging

View file

@ -50,6 +50,15 @@ process {
] ]
} }
withName: PORECHOP {
ext.prefix = { "${meta.id}_${meta.run_accession}" }
publishDir = [
path: { "${params.outdir}/porechop" },
mode: 'copy',
pattern: '*.fastq.gz'
]
}
withName: FASTQC_POST { withName: FASTQC_POST {
ext.args = '--quiet' ext.args = '--quiet'
ext.prefix = { "${meta.id}_${meta.run_accession}_processed" } ext.prefix = { "${meta.id}_${meta.run_accession}_processed" }

View file

@ -23,6 +23,9 @@
}, },
"multiqc": { "multiqc": {
"git_sha": "20d8250d9f39ddb05dfb437603aaf99b5c0b2b41" "git_sha": "20d8250d9f39ddb05dfb437603aaf99b5c0b2b41"
},
"porechop": {
"git_sha": "e20e57f90b6787ac9a010a980cf6ea98bd990046"
} }
} }
} }

35
modules/nf-core/modules/porechop/main.nf generated Normal file
View file

@ -0,0 +1,35 @@
process PORECHOP {
tag "$meta.id"
label 'process_medium'
conda (params.enable_conda ? "bioconda::porechop=0.2.4" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/porechop:0.2.4--py39h7cff6ad_2' :
'quay.io/biocontainers/porechop:0.2.4--py39h7cff6ad_2' }"
input:
tuple val(meta), path(reads)
output:
tuple val(meta), path("*.fastq.gz"), emit: reads
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
"""
porechop \\
-i $reads \\
-t $task.cpus \\
$args \\
-o ${prefix}.fastq.gz
cat <<-END_VERSIONS > versions.yml
"${task.process}":
porechop: \$( porechop --version )
END_VERSIONS
"""
}

View file

@ -0,0 +1,50 @@
name: porechop
description: Adapter removal and demultiplexing of Oxford Nanopore reads
keywords:
- adapter
- nanopore
- demultiplexing
tools:
- porechop:
description: Adapter removal and demultiplexing of Oxford Nanopore reads
homepage: "https://github.com/rrwick/Porechop"
documentation: "https://github.com/rrwick/Porechop"
tool_dev_url: "https://github.com/rrwick/Porechop"
doi: "10.1099/mgen.0.000132"
licence: ["GPL v3"]
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- reads:
type: file
description: fastq/fastq.gz file
pattern: "*.{fastq,fastq.gz,fq,fq.gz}"
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- reads:
type: file
description: Demultiplexed and/or adapter-trimmed fastq.gz file
pattern: "*.{fastq.gz}"
authors:
- "@ggabernet"
- "@jasmezz"
- "@d4straub"
- "@LaurenceKuhl"
- "@SusiJo"
- "@jonasscheid"
- "@jonoave"
- "@GokceOGUZ"

View file

@ -57,6 +57,7 @@ params {
// FASTQ preprocessing // FASTQ preprocessing
fastp_clip_merge = false fastp_clip_merge = false
fastp_exclude_unmerged = true fastp_exclude_unmerged = true
remove_adapters = false
// MALT // MALT
run_malt = false run_malt = false

View file

@ -0,0 +1,34 @@
include { FASTQC as FASTQC_POST } from '../../modules/nf-core/modules/fastqc/main'
include { PORECHOP } from '../../modules/nf-core/modules/porechop/main'
workflow LONGREAD_PREPROCESSING {
take:
reads
main:
ch_versions = Channel.empty()
ch_multiqc_files = Channel.empty()
PORECHOP ( reads )
ch_processed_reads = PORECHOP.out.reads
.dump(tag: "pre_fastqc_check")
.map {
meta, reads ->
def meta_new = meta.clone()
meta_new['single_end'] = 1
[ meta_new, reads ]
}
FASTQC_POST ( PORECHOP.out.reads )
ch_versions = ch_versions.mix(PORECHOP.out.versions.first())
ch_multiqc_files = ch_multiqc_files.mix( FASTQC_POST.out.zip.collect{it[1]} )
emit:
reads = ch_processed_reads // channel: [ val(meta), [ reads ] ]
versions = ch_versions // channel: [ versions.yml ]
mqc = ch_multiqc_files
}

View file

@ -7,7 +7,7 @@ include { FASTP as FASTP_SINGLE } from '../../modules/nf-core/modules/fast
include { FASTP as FASTP_PAIRED } from '../../modules/nf-core/modules/fastp/main' include { FASTP as FASTP_PAIRED } from '../../modules/nf-core/modules/fastp/main'
include { FASTQC as FASTQC_POST } from '../../modules/nf-core/modules/fastqc/main' include { FASTQC as FASTQC_POST } from '../../modules/nf-core/modules/fastqc/main'
workflow FASTQ_PREPROCESSING { workflow SHORTREAD_PREPROCESSING {
take: take:
reads // file: /path/to/samplesheet.csv reads // file: /path/to/samplesheet.csv

View file

@ -39,8 +39,8 @@ ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath(params.multi
include { INPUT_CHECK } from '../subworkflows/local/input_check' include { INPUT_CHECK } from '../subworkflows/local/input_check'
include { DB_CHECK } from '../subworkflows/local/db_check' include { DB_CHECK } from '../subworkflows/local/db_check'
include { FASTQ_PREPROCESSING } from '../subworkflows/local/preprocessing' include { SHORTREAD_PREPROCESSING } from '../subworkflows/local/shortread_preprocessing'
include { LONGREAD_PREPROCESSING } from '../subworkflows/local/longread_preprocessing'
/* /*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@ -101,13 +101,24 @@ workflow TAXPROFILER {
// PERFORM PREPROCESSING // PERFORM PREPROCESSING
// //
if ( params.fastp_clip_merge ) { if ( params.fastp_clip_merge ) {
FASTQ_PREPROCESSING ( INPUT_CHECK.out.fastq ) SHORTREAD_PREPROCESSING ( INPUT_CHECK.out.fastq )
}
ch_multiqc_files = Channel.empty()
if ( params.remove_adapters ) {
ch_longreads_preprocessed = LONGREAD_PREPROCESSING ( INPUT_CHECK.out.nanopore ).reads
.map { it -> [ it[0], [it[1]] ] }
ch_versions = ch_versions.mix(LONGREAD_PREPROCESSING.out.versions.first())
ch_multiqc_files = ch_multiqc_files.mix(LONGREAD_PREPROCESSING.out.mqc)
} else {
ch_longreads_preprocessed = INPUT_CHECK.out.nanopore
} }
// //
// PERFORM RUN MERGING // PERFORM RUN MERGING
// //
ch_processed_for_combine = FASTQ_PREPROCESSING.out.reads ch_processed_for_combine = SHORTREAD_PREPROCESSING.out.reads
.dump(tag: "prep_for_combine_grouping") .dump(tag: "prep_for_combine_grouping")
.map { .map {
meta, reads -> meta, reads ->
@ -134,6 +145,7 @@ workflow TAXPROFILER {
// output [DUMP: reads_plus_db] [['id':'2612', 'run_accession':'combined', 'instrument_platform':'ILLUMINA', 'single_end':1], <reads_path>/2612.merged.fastq.gz, ['tool':'malt', 'db_name':'mal95', 'db_params':'"-id 90"'], <db_path>/malt90] // output [DUMP: reads_plus_db] [['id':'2612', 'run_accession':'combined', 'instrument_platform':'ILLUMINA', 'single_end':1], <reads_path>/2612.merged.fastq.gz, ['tool':'malt', 'db_name':'mal95', 'db_params':'"-id 90"'], <db_path>/malt90]
ch_input_for_profiling = ch_reads_for_profiling ch_input_for_profiling = ch_reads_for_profiling
.mix( ch_longreads_preprocessed )
.combine(DB_CHECK.out.dbs) .combine(DB_CHECK.out.dbs)
.dump(tag: "reads_plus_db") .dump(tag: "reads_plus_db")
.branch { .branch {
@ -185,14 +197,13 @@ workflow TAXPROFILER {
workflow_summary = WorkflowTaxprofiler.paramsSummaryMultiqc(workflow, summary_params) workflow_summary = WorkflowTaxprofiler.paramsSummaryMultiqc(workflow, summary_params)
ch_workflow_summary = Channel.value(workflow_summary) ch_workflow_summary = Channel.value(workflow_summary)
ch_multiqc_files = Channel.empty()
ch_multiqc_files = ch_multiqc_files.mix(Channel.from(ch_multiqc_config)) ch_multiqc_files = ch_multiqc_files.mix(Channel.from(ch_multiqc_config))
ch_multiqc_files = ch_multiqc_files.mix(ch_multiqc_custom_config.collect().ifEmpty([])) ch_multiqc_files = ch_multiqc_files.mix(ch_multiqc_custom_config.collect().ifEmpty([]))
ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml'))
ch_multiqc_files = ch_multiqc_files.mix(CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml.collect()) ch_multiqc_files = ch_multiqc_files.mix(CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml.collect())
ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}.ifEmpty([])) ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}.ifEmpty([]))
if (params.fastp_clip_merge) { if (params.fastp_clip_merge) {
ch_multiqc_files = ch_multiqc_files.mix(FASTQ_PREPROCESSING.out.mqc) ch_multiqc_files = ch_multiqc_files.mix(SHORTREAD_PREPROCESSING.out.mqc)
} }
if (params.run_kraken2) { if (params.run_kraken2) {
ch_multiqc_files = ch_multiqc_files.mix(KRAKEN2_KRAKEN2.out.txt.collect{it[1]}.ifEmpty([])) ch_multiqc_files = ch_multiqc_files.mix(KRAKEN2_KRAKEN2.out.txt.collect{it[1]}.ifEmpty([]))