From 16be676d72f9964038a52fb0ddc58c9bdafd4f9b Mon Sep 17 00:00:00 2001 From: ljmesi <37740329+ljmesi@users.noreply.github.com> Date: Fri, 18 Mar 2022 14:34:10 +0100 Subject: [PATCH 1/9] Add Porechop module --- conf/modules.config | 9 ++++ modules.json | 3 ++ modules/nf-core/modules/porechop/main.nf | 35 ++++++++++++++++ modules/nf-core/modules/porechop/meta.yml | 50 +++++++++++++++++++++++ 4 files changed, 97 insertions(+) create mode 100644 modules/nf-core/modules/porechop/main.nf create mode 100644 modules/nf-core/modules/porechop/meta.yml diff --git a/conf/modules.config b/conf/modules.config index 9e334bc..050772e 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -50,6 +50,15 @@ process { ] } + withName: PORECHOP { + ext.prefix = { "${meta.id}_${meta.run_accession}" } + publishDir = [ + path: { "${params.outdir}/porechop" }, + mode: 'copy', + pattern: '*.fastq.gz' + ] + } + withName: FASTQC_POST { ext.args = '--quiet' ext.prefix = { "${meta.id}_${meta.run_accession}_processed" } diff --git a/modules.json b/modules.json index 6a785b8..284cf13 100644 --- a/modules.json +++ b/modules.json @@ -23,6 +23,9 @@ }, "multiqc": { "git_sha": "20d8250d9f39ddb05dfb437603aaf99b5c0b2b41" + }, + "porechop": { + "git_sha": "e20e57f90b6787ac9a010a980cf6ea98bd990046" } } } diff --git a/modules/nf-core/modules/porechop/main.nf b/modules/nf-core/modules/porechop/main.nf new file mode 100644 index 0000000..65982b8 --- /dev/null +++ b/modules/nf-core/modules/porechop/main.nf @@ -0,0 +1,35 @@ +process PORECHOP { + tag "$meta.id" + label 'process_medium' + + conda (params.enable_conda ? "bioconda::porechop=0.2.4" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/porechop:0.2.4--py39h7cff6ad_2' : + 'quay.io/biocontainers/porechop:0.2.4--py39h7cff6ad_2' }" + + input: + tuple val(meta), path(reads) + + output: + tuple val(meta), path("*.fastq.gz"), emit: reads + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + porechop \\ + -i $reads \\ + -t $task.cpus \\ + $args \\ + -o ${prefix}.fastq.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + porechop: \$( porechop --version ) + END_VERSIONS + """ +} diff --git a/modules/nf-core/modules/porechop/meta.yml b/modules/nf-core/modules/porechop/meta.yml new file mode 100644 index 0000000..81399d2 --- /dev/null +++ b/modules/nf-core/modules/porechop/meta.yml @@ -0,0 +1,50 @@ +name: porechop +description: Adapter removal and demultiplexing of Oxford Nanopore reads +keywords: + - adapter + - nanopore + - demultiplexing +tools: + - porechop: + description: Adapter removal and demultiplexing of Oxford Nanopore reads + homepage: "https://github.com/rrwick/Porechop" + documentation: "https://github.com/rrwick/Porechop" + tool_dev_url: "https://github.com/rrwick/Porechop" + doi: "10.1099/mgen.0.000132" + licence: ["GPL v3"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: fastq/fastq.gz file + pattern: "*.{fastq,fastq.gz,fq,fq.gz}" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - reads: + type: file + description: Demultiplexed and/or adapter-trimmed fastq.gz file + pattern: "*.{fastq.gz}" + +authors: + - "@ggabernet" + - "@jasmezz" + - "@d4straub" + - "@LaurenceKuhl" + - "@SusiJo" + - "@jonasscheid" + - "@jonoave" + - "@GokceOGUZ" From 1e42f1d9f295e909cc75d10b6306cce2d1f4bf22 Mon Sep 17 00:00:00 2001 From: ljmesi <37740329+ljmesi@users.noreply.github.com> Date: Fri, 18 Mar 2022 15:10:44 +0100 Subject: [PATCH 2/9] Add long read preprocessing subworkflow --- subworkflows/local/longread_preprocessing.nf | 34 ++++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100644 subworkflows/local/longread_preprocessing.nf diff --git a/subworkflows/local/longread_preprocessing.nf b/subworkflows/local/longread_preprocessing.nf new file mode 100644 index 0000000..da1049a --- /dev/null +++ b/subworkflows/local/longread_preprocessing.nf @@ -0,0 +1,34 @@ + +include { FASTQC as FASTQC_POST } from '../../modules/nf-core/modules/fastqc/main' +include { PORECHOP } from '../../modules/nf-core/modules/porechop/main' + +workflow LONGREAD_PREPROCESSING { + take: + reads + + main: + ch_versions = Channel.empty() + ch_multiqc_files = Channel.empty() + + PORECHOP ( reads ) + + ch_processed_reads = PORECHOP.out.reads + .dump(tag: "pre_fastqc_check") + .map { + meta, reads -> + def meta_new = meta.clone() + meta_new['single_end'] = 1 + [ meta_new, reads ] + } + + FASTQC_POST ( PORECHOP.out.reads ) + ch_versions = ch_versions.mix(PORECHOP.out.versions.first()) + ch_multiqc_files = ch_multiqc_files.mix( FASTQC_POST.out.zip.collect{it[1]} ) + + + emit: + reads = ch_processed_reads // channel: [ val(meta), [ reads ] ] + versions = ch_versions // channel: [ versions.yml ] + mqc = ch_multiqc_files +} + From 582aaa105fff0328f8df314d88ab84fe6c8e528b Mon Sep 17 00:00:00 2001 From: ljmesi <37740329+ljmesi@users.noreply.github.com> Date: Fri, 18 Mar 2022 15:12:07 +0100 Subject: [PATCH 3/9] Include long reads preprocessing subworkflow --- workflows/taxprofiler.nf | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/workflows/taxprofiler.nf b/workflows/taxprofiler.nf index f48cff6..9e52b59 100644 --- a/workflows/taxprofiler.nf +++ b/workflows/taxprofiler.nf @@ -40,7 +40,7 @@ include { INPUT_CHECK } from '../subworkflows/local/input_check' include { DB_CHECK } from '../subworkflows/local/db_check' include { FASTQ_PREPROCESSING } from '../subworkflows/local/preprocessing' - +include { LONGREAD_PREPROCESSING } from '../subworkflows/local/longread_preprocessing' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -104,6 +104,10 @@ workflow TAXPROFILER { FASTQ_PREPROCESSING ( INPUT_CHECK.out.fastq ) } + LONGREAD_PREPROCESSING ( INPUT_CHECK.out.nanopore ) + + ch_versions = ch_versions.mix(LONGREAD_PREPROCESSING.out.versions.first()) + // // PERFORM RUN MERGING // @@ -191,6 +195,7 @@ workflow TAXPROFILER { ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) ch_multiqc_files = ch_multiqc_files.mix(CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml.collect()) ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}.ifEmpty([])) + ch_multiqc_files = ch_multiqc_files.mix(LONGREAD_PREPROCESSING.out.mqc) if (params.fastp_clip_merge) { ch_multiqc_files = ch_multiqc_files.mix(FASTQ_PREPROCESSING.out.mqc) } From d09a3c170edac3afb7e6932cbcca824d6b77e202 Mon Sep 17 00:00:00 2001 From: ljmesi <37740329+ljmesi@users.noreply.github.com> Date: Fri, 18 Mar 2022 15:46:03 +0100 Subject: [PATCH 4/9] Add Porechop --- CITATIONS.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CITATIONS.md b/CITATIONS.md index 192b2f4..53c53c3 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -15,6 +15,8 @@ * [MultiQC](https://pubmed.ncbi.nlm.nih.gov/27312411/) > Ewels P, Magnusson M, Lundin S, Käller M. MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics. 2016 Oct 1;32(19):3047-8. doi: 10.1093/bioinformatics/btw354. Epub 2016 Jun 16. PubMed PMID: 27312411; PubMed Central PMCID: PMC5039924. +* [Porechop](https://github.com/rrwick/Porechop) + ## Software packaging/containerisation tools * [Anaconda](https://anaconda.com) From 24a01529f5053c51a0f548ad04790f9bd5e3df9d Mon Sep 17 00:00:00 2001 From: ljmesi <37740329+ljmesi@users.noreply.github.com> Date: Fri, 18 Mar 2022 15:47:46 +0100 Subject: [PATCH 5/9] Add mentioning about Nanopore reads pre-processing --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 5d0c74b..622b8de 100644 --- a/README.md +++ b/README.md @@ -34,6 +34,7 @@ On release, automated continuous integration tests run the pipeline on a full-si - Low complexity filtering - Host read removal - Run merging + - Adapter and quality trimming of Nanopore reads 3. Performs taxonomic profiling a choice of: - Kraken2 - MetaPhlAn3 From c97de32434d03fdc97c0a5dc9c75cf328b027193 Mon Sep 17 00:00:00 2001 From: ljmesi <37740329+ljmesi@users.noreply.github.com> Date: Mon, 21 Mar 2022 18:17:08 +0100 Subject: [PATCH 6/9] Make adapter and quality trimming optional --- nextflow.config | 1 + workflows/taxprofiler.nf | 12 +++++++++--- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/nextflow.config b/nextflow.config index 7b897ab..4a3a56d 100644 --- a/nextflow.config +++ b/nextflow.config @@ -57,6 +57,7 @@ params { // FASTQ preprocessing fastp_clip_merge = false fastp_exclude_unmerged = true + remove_adapters = false // MALT run_malt = false diff --git a/workflows/taxprofiler.nf b/workflows/taxprofiler.nf index 9e52b59..0e144f3 100644 --- a/workflows/taxprofiler.nf +++ b/workflows/taxprofiler.nf @@ -104,9 +104,16 @@ workflow TAXPROFILER { FASTQ_PREPROCESSING ( INPUT_CHECK.out.fastq ) } - LONGREAD_PREPROCESSING ( INPUT_CHECK.out.nanopore ) + ch_multiqc_files = Channel.empty() + if ( params.remove_adapters ) { + ch_longreads_preprocessed = LONGREAD_PREPROCESSING ( INPUT_CHECK.out.nanopore ).reads + .map { it -> [ it[0], [it[1]] ] } ch_versions = ch_versions.mix(LONGREAD_PREPROCESSING.out.versions.first()) + ch_multiqc_files = ch_multiqc_files.mix(LONGREAD_PREPROCESSING.out.mqc) + } else { + ch_longreads_preprocessed = INPUT_CHECK.out.nanopore + } // // PERFORM RUN MERGING @@ -138,6 +145,7 @@ workflow TAXPROFILER { // output [DUMP: reads_plus_db] [['id':'2612', 'run_accession':'combined', 'instrument_platform':'ILLUMINA', 'single_end':1], /2612.merged.fastq.gz, ['tool':'malt', 'db_name':'mal95', 'db_params':'"-id 90"'], /malt90] ch_input_for_profiling = ch_reads_for_profiling + .mix( ch_longreads_preprocessed ) .combine(DB_CHECK.out.dbs) .dump(tag: "reads_plus_db") .branch { @@ -189,13 +197,11 @@ workflow TAXPROFILER { workflow_summary = WorkflowTaxprofiler.paramsSummaryMultiqc(workflow, summary_params) ch_workflow_summary = Channel.value(workflow_summary) - ch_multiqc_files = Channel.empty() ch_multiqc_files = ch_multiqc_files.mix(Channel.from(ch_multiqc_config)) ch_multiqc_files = ch_multiqc_files.mix(ch_multiqc_custom_config.collect().ifEmpty([])) ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) ch_multiqc_files = ch_multiqc_files.mix(CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml.collect()) ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}.ifEmpty([])) - ch_multiqc_files = ch_multiqc_files.mix(LONGREAD_PREPROCESSING.out.mqc) if (params.fastp_clip_merge) { ch_multiqc_files = ch_multiqc_files.mix(FASTQ_PREPROCESSING.out.mqc) } From f6fe26de466446379f49ba00dd80eb995bafd0bb Mon Sep 17 00:00:00 2001 From: ljmesi <37740329+ljmesi@users.noreply.github.com> Date: Mon, 21 Mar 2022 18:25:56 +0100 Subject: [PATCH 7/9] Rename shortread subworkflow to be more consistent --- .../{preprocessing.nf => shortread_preprocessing.nf} | 0 workflows/taxprofiler.nf | 8 ++++---- 2 files changed, 4 insertions(+), 4 deletions(-) rename subworkflows/local/{preprocessing.nf => shortread_preprocessing.nf} (100%) diff --git a/subworkflows/local/preprocessing.nf b/subworkflows/local/shortread_preprocessing.nf similarity index 100% rename from subworkflows/local/preprocessing.nf rename to subworkflows/local/shortread_preprocessing.nf diff --git a/workflows/taxprofiler.nf b/workflows/taxprofiler.nf index 0e144f3..22c7518 100644 --- a/workflows/taxprofiler.nf +++ b/workflows/taxprofiler.nf @@ -39,7 +39,7 @@ ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath(params.multi include { INPUT_CHECK } from '../subworkflows/local/input_check' include { DB_CHECK } from '../subworkflows/local/db_check' -include { FASTQ_PREPROCESSING } from '../subworkflows/local/preprocessing' +include { SHORTREAD_PREPROCESSING } from '../subworkflows/local/shortread_preprocessing' include { LONGREAD_PREPROCESSING } from '../subworkflows/local/longread_preprocessing' /* @@ -101,7 +101,7 @@ workflow TAXPROFILER { // PERFORM PREPROCESSING // if ( params.fastp_clip_merge ) { - FASTQ_PREPROCESSING ( INPUT_CHECK.out.fastq ) + SHORTREAD_PREPROCESSING ( INPUT_CHECK.out.fastq ) } ch_multiqc_files = Channel.empty() @@ -118,7 +118,7 @@ workflow TAXPROFILER { // // PERFORM RUN MERGING // - ch_processed_for_combine = FASTQ_PREPROCESSING.out.reads + ch_processed_for_combine = SHORTREAD_PREPROCESSING.out.reads .dump(tag: "prep_for_combine_grouping") .map { meta, reads -> @@ -203,7 +203,7 @@ workflow TAXPROFILER { ch_multiqc_files = ch_multiqc_files.mix(CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml.collect()) ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}.ifEmpty([])) if (params.fastp_clip_merge) { - ch_multiqc_files = ch_multiqc_files.mix(FASTQ_PREPROCESSING.out.mqc) + ch_multiqc_files = ch_multiqc_files.mix(SHORTREAD_PREPROCESSING.out.mqc) } if (params.run_kraken2) { ch_multiqc_files = ch_multiqc_files.mix(KRAKEN2_KRAKEN2.out.txt.collect{it[1]}.ifEmpty([])) From 4940ec57ffee76378b9bfe8dcff1d73f3097846e Mon Sep 17 00:00:00 2001 From: ljmesi <37740329+ljmesi@users.noreply.github.com> Date: Mon, 21 Mar 2022 18:26:26 +0100 Subject: [PATCH 8/9] Remove unnecessary extra point --- README.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/README.md b/README.md index 622b8de..d454a9b 100644 --- a/README.md +++ b/README.md @@ -30,11 +30,10 @@ On release, automated continuous integration tests run the pipeline on a full-si 1. Read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/)) 2. Performs optional read pre-processing - - Adapter clipping and merging + - Adapter clipping and merging (short, and nanopore reads) - Low complexity filtering - Host read removal - Run merging - - Adapter and quality trimming of Nanopore reads 3. Performs taxonomic profiling a choice of: - Kraken2 - MetaPhlAn3 From 5b1b48e59e17d271d03450daca083dd8cec502af Mon Sep 17 00:00:00 2001 From: ljmesi <37740329+ljmesi@users.noreply.github.com> Date: Mon, 21 Mar 2022 18:29:47 +0100 Subject: [PATCH 9/9] Update subworkflow name to be more consistent --- subworkflows/local/shortread_preprocessing.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/subworkflows/local/shortread_preprocessing.nf b/subworkflows/local/shortread_preprocessing.nf index 5832824..406c198 100644 --- a/subworkflows/local/shortread_preprocessing.nf +++ b/subworkflows/local/shortread_preprocessing.nf @@ -7,7 +7,7 @@ include { FASTP as FASTP_SINGLE } from '../../modules/nf-core/modules/fast include { FASTP as FASTP_PAIRED } from '../../modules/nf-core/modules/fastp/main' include { FASTQC as FASTQC_POST } from '../../modules/nf-core/modules/fastqc/main' -workflow FASTQ_PREPROCESSING { +workflow SHORTREAD_PREPROCESSING { take: reads // file: /path/to/samplesheet.csv