From 0f0ed6cd4698df3bea9d4168c056085a820eb056 Mon Sep 17 00:00:00 2001 From: ljmesi <37740329+ljmesi@users.noreply.github.com> Date: Fri, 18 Mar 2022 10:45:06 +0100 Subject: [PATCH 01/16] Fix function name --- subworkflows/local/input_check.nf | 1 + 1 file changed, 1 insertion(+) diff --git a/subworkflows/local/input_check.nf b/subworkflows/local/input_check.nf index d66fb3a..481028f 100644 --- a/subworkflows/local/input_check.nf +++ b/subworkflows/local/input_check.nf @@ -15,6 +15,7 @@ workflow INPUT_CHECK { .dump(tag: "input_split_csv_out") .branch { fasta: it['fasta'] != '' + nanopore: it['instrument_platform'] == 'OXFORD_NANOPORE' fastq: true } From 41b3d8db822caab916ec82fe4b4f581f17ab1ca5 Mon Sep 17 00:00:00 2001 From: ljmesi <37740329+ljmesi@users.noreply.github.com> Date: Fri, 18 Mar 2022 10:47:41 +0100 Subject: [PATCH 02/16] Add nanopore channel --- subworkflows/local/input_check.nf | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/subworkflows/local/input_check.nf b/subworkflows/local/input_check.nf index 481028f..2e30bcc 100644 --- a/subworkflows/local/input_check.nf +++ b/subworkflows/local/input_check.nf @@ -20,10 +20,15 @@ workflow INPUT_CHECK { } parsed_samplesheet.fastq - .map { create_fastq_channels(it) } + .map { create_fastq_channel(it) } .dump(tag: "fastq_channel_init") .set { fastq } + parsed_samplesheet.nanopore + .map { create_fastq_channel(it) } + .dump(tag: "fastq_nanopore_channel_init") + .set { nanopore } + parsed_samplesheet.fasta .map { create_fasta_channels(it) } .dump(tag: "fasta_channel_init") @@ -31,6 +36,7 @@ workflow INPUT_CHECK { emit: fastq // channel: [ val(meta), [ reads ] ] + nanopore // channel: [ val(meta), [ reads ] ] fasta // channel: [ val(meta), fasta ] versions = SAMPLESHEET_CHECK.out.versions // channel: [ versions.yml ] } @@ -52,10 +58,17 @@ def create_fastq_channel(LinkedHashMap row) { if (meta.single_end) { fastq_meta = [ meta, [ file(row.fastq_1) ] ] } else { - if (!file(row.fastq_2).exists()) { - exit 1, "ERROR: Please check input samplesheet -> Read 2 FastQ file does not exist!\n${row.fastq_2}" + if (meta.instrument_platform == 'OXFORD_NANOPORE') { + if (row.fastq_2 != '') { + exit 1, "ERROR: Please check input samplesheet -> For Oxford Nanopore reads Read 2 FastQ should be empty!\n${row.fastq_2}" + } + fastq_meta = [ meta, [ file(row.fastq_1) ] ] + } else { + if (!file(row.fastq_2).exists()) { + exit 1, "ERROR: Please check input samplesheet -> Read 2 FastQ file does not exist!\n${row.fastq_2}" + } + fastq_meta = [ meta, [ file(row.fastq_1), file(row.fastq_2) ] ] } - fastq_meta = [ meta, [ file(row.fastq_1), file(row.fastq_2) ] ] } return fastq_meta } From 7f7ddc9f14237f1616918963a002cbc64fea2687 Mon Sep 17 00:00:00 2001 From: ljmesi <37740329+ljmesi@users.noreply.github.com> Date: Fri, 18 Mar 2022 10:48:06 +0100 Subject: [PATCH 03/16] Update comment --- bin/check_samplesheet.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/check_samplesheet.py b/bin/check_samplesheet.py index 16e668b..d10ee90 100755 --- a/bin/check_samplesheet.py +++ b/bin/check_samplesheet.py @@ -173,7 +173,7 @@ def check_samplesheet(file_in, file_out): ## Auto-detect paired-end/single-end if sample and fastq_1 and fastq_2: ## Paired-end short reads sample_info.extend(["0", fastq_1, fastq_2, fasta]) - elif sample and fastq_1 and not fastq_2: ## Single-end short reads + elif sample and fastq_1 and not fastq_2: ## Single-end short/long fastq reads sample_info.extend(["1", fastq_1, fastq_2, fasta]) elif ( sample and fasta and not fastq_1 and not fastq_2 From 2e1b6c5d0a3b7c455bba5cb4d20dbbceac43dffe Mon Sep 17 00:00:00 2001 From: ljmesi <37740329+ljmesi@users.noreply.github.com> Date: Fri, 18 Mar 2022 11:00:36 +0100 Subject: [PATCH 04/16] Add info on Nanopore reads to fastq_1 column --- docs/usage.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/docs/usage.md b/docs/usage.md index a8b0448..38c063e 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -44,11 +44,11 @@ TREATMENT_REP3,AEG588A6_S6_L003_R1_001.fastq.gz, TREATMENT_REP3,AEG588A6_S6_L004_R1_001.fastq.gz, ``` -| Column | Description | -|----------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| `sample` | Custom sample name. This entry will be identical for multiple sequencing libraries/runs from the same sample. Spaces in sample names are automatically converted to underscores (`_`). | -| `fastq_1` | Full path to FastQ file for Illumina short reads 1. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". | -| `fastq_2` | Full path to FastQ file for Illumina short reads 2. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". | +| Column | Description | +| --------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `sample` | Custom sample name. This entry will be identical for multiple sequencing libraries/runs from the same sample. Spaces in sample names are automatically converted to underscores (`_`). | +| `fastq_1` | Full path to FastQ file for Illumina short reads 1 or Nanopore reads. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". | +| `fastq_2` | Full path to FastQ file for Illumina short reads 2. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". | An [example samplesheet](../assets/samplesheet.csv) has been provided with the pipeline. From c8e49c56f4f6b26dde209acfd474fc5c4f43caf7 Mon Sep 17 00:00:00 2001 From: ljmesi <37740329+ljmesi@users.noreply.github.com> Date: Fri, 18 Mar 2022 13:47:44 +0100 Subject: [PATCH 05/16] Perform fastqc on nanopore reads before trimming --- workflows/taxprofiler.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflows/taxprofiler.nf b/workflows/taxprofiler.nf index f740324..c3d3eb6 100644 --- a/workflows/taxprofiler.nf +++ b/workflows/taxprofiler.nf @@ -89,7 +89,7 @@ workflow TAXPROFILER { // MODULE: Run FastQC // FASTQC ( - INPUT_CHECK.out.fastq + INPUT_CHECK.out.fastq.concat( INPUT_CHECK.out.nanopore ) ) ch_versions = ch_versions.mix(FASTQC.out.versions.first()) From 0936b9b28e52986576d9bb62473373f209565d6d Mon Sep 17 00:00:00 2001 From: Lauri Mesilaakso Date: Fri, 18 Mar 2022 14:18:38 +0100 Subject: [PATCH 06/16] Update workflows/taxprofiler.nf Co-authored-by: James A. Fellows Yates --- workflows/taxprofiler.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflows/taxprofiler.nf b/workflows/taxprofiler.nf index c3d3eb6..f48cff6 100644 --- a/workflows/taxprofiler.nf +++ b/workflows/taxprofiler.nf @@ -89,7 +89,7 @@ workflow TAXPROFILER { // MODULE: Run FastQC // FASTQC ( - INPUT_CHECK.out.fastq.concat( INPUT_CHECK.out.nanopore ) + INPUT_CHECK.out.fastq.mix( INPUT_CHECK.out.nanopore ) ) ch_versions = ch_versions.mix(FASTQC.out.versions.first()) From 16be676d72f9964038a52fb0ddc58c9bdafd4f9b Mon Sep 17 00:00:00 2001 From: ljmesi <37740329+ljmesi@users.noreply.github.com> Date: Fri, 18 Mar 2022 14:34:10 +0100 Subject: [PATCH 07/16] Add Porechop module --- conf/modules.config | 9 ++++ modules.json | 3 ++ modules/nf-core/modules/porechop/main.nf | 35 ++++++++++++++++ modules/nf-core/modules/porechop/meta.yml | 50 +++++++++++++++++++++++ 4 files changed, 97 insertions(+) create mode 100644 modules/nf-core/modules/porechop/main.nf create mode 100644 modules/nf-core/modules/porechop/meta.yml diff --git a/conf/modules.config b/conf/modules.config index 9e334bc..050772e 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -50,6 +50,15 @@ process { ] } + withName: PORECHOP { + ext.prefix = { "${meta.id}_${meta.run_accession}" } + publishDir = [ + path: { "${params.outdir}/porechop" }, + mode: 'copy', + pattern: '*.fastq.gz' + ] + } + withName: FASTQC_POST { ext.args = '--quiet' ext.prefix = { "${meta.id}_${meta.run_accession}_processed" } diff --git a/modules.json b/modules.json index 6a785b8..284cf13 100644 --- a/modules.json +++ b/modules.json @@ -23,6 +23,9 @@ }, "multiqc": { "git_sha": "20d8250d9f39ddb05dfb437603aaf99b5c0b2b41" + }, + "porechop": { + "git_sha": "e20e57f90b6787ac9a010a980cf6ea98bd990046" } } } diff --git a/modules/nf-core/modules/porechop/main.nf b/modules/nf-core/modules/porechop/main.nf new file mode 100644 index 0000000..65982b8 --- /dev/null +++ b/modules/nf-core/modules/porechop/main.nf @@ -0,0 +1,35 @@ +process PORECHOP { + tag "$meta.id" + label 'process_medium' + + conda (params.enable_conda ? "bioconda::porechop=0.2.4" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/porechop:0.2.4--py39h7cff6ad_2' : + 'quay.io/biocontainers/porechop:0.2.4--py39h7cff6ad_2' }" + + input: + tuple val(meta), path(reads) + + output: + tuple val(meta), path("*.fastq.gz"), emit: reads + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + porechop \\ + -i $reads \\ + -t $task.cpus \\ + $args \\ + -o ${prefix}.fastq.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + porechop: \$( porechop --version ) + END_VERSIONS + """ +} diff --git a/modules/nf-core/modules/porechop/meta.yml b/modules/nf-core/modules/porechop/meta.yml new file mode 100644 index 0000000..81399d2 --- /dev/null +++ b/modules/nf-core/modules/porechop/meta.yml @@ -0,0 +1,50 @@ +name: porechop +description: Adapter removal and demultiplexing of Oxford Nanopore reads +keywords: + - adapter + - nanopore + - demultiplexing +tools: + - porechop: + description: Adapter removal and demultiplexing of Oxford Nanopore reads + homepage: "https://github.com/rrwick/Porechop" + documentation: "https://github.com/rrwick/Porechop" + tool_dev_url: "https://github.com/rrwick/Porechop" + doi: "10.1099/mgen.0.000132" + licence: ["GPL v3"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: fastq/fastq.gz file + pattern: "*.{fastq,fastq.gz,fq,fq.gz}" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - reads: + type: file + description: Demultiplexed and/or adapter-trimmed fastq.gz file + pattern: "*.{fastq.gz}" + +authors: + - "@ggabernet" + - "@jasmezz" + - "@d4straub" + - "@LaurenceKuhl" + - "@SusiJo" + - "@jonasscheid" + - "@jonoave" + - "@GokceOGUZ" From 1e42f1d9f295e909cc75d10b6306cce2d1f4bf22 Mon Sep 17 00:00:00 2001 From: ljmesi <37740329+ljmesi@users.noreply.github.com> Date: Fri, 18 Mar 2022 15:10:44 +0100 Subject: [PATCH 08/16] Add long read preprocessing subworkflow --- subworkflows/local/longread_preprocessing.nf | 34 ++++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100644 subworkflows/local/longread_preprocessing.nf diff --git a/subworkflows/local/longread_preprocessing.nf b/subworkflows/local/longread_preprocessing.nf new file mode 100644 index 0000000..da1049a --- /dev/null +++ b/subworkflows/local/longread_preprocessing.nf @@ -0,0 +1,34 @@ + +include { FASTQC as FASTQC_POST } from '../../modules/nf-core/modules/fastqc/main' +include { PORECHOP } from '../../modules/nf-core/modules/porechop/main' + +workflow LONGREAD_PREPROCESSING { + take: + reads + + main: + ch_versions = Channel.empty() + ch_multiqc_files = Channel.empty() + + PORECHOP ( reads ) + + ch_processed_reads = PORECHOP.out.reads + .dump(tag: "pre_fastqc_check") + .map { + meta, reads -> + def meta_new = meta.clone() + meta_new['single_end'] = 1 + [ meta_new, reads ] + } + + FASTQC_POST ( PORECHOP.out.reads ) + ch_versions = ch_versions.mix(PORECHOP.out.versions.first()) + ch_multiqc_files = ch_multiqc_files.mix( FASTQC_POST.out.zip.collect{it[1]} ) + + + emit: + reads = ch_processed_reads // channel: [ val(meta), [ reads ] ] + versions = ch_versions // channel: [ versions.yml ] + mqc = ch_multiqc_files +} + From 582aaa105fff0328f8df314d88ab84fe6c8e528b Mon Sep 17 00:00:00 2001 From: ljmesi <37740329+ljmesi@users.noreply.github.com> Date: Fri, 18 Mar 2022 15:12:07 +0100 Subject: [PATCH 09/16] Include long reads preprocessing subworkflow --- workflows/taxprofiler.nf | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/workflows/taxprofiler.nf b/workflows/taxprofiler.nf index f48cff6..9e52b59 100644 --- a/workflows/taxprofiler.nf +++ b/workflows/taxprofiler.nf @@ -40,7 +40,7 @@ include { INPUT_CHECK } from '../subworkflows/local/input_check' include { DB_CHECK } from '../subworkflows/local/db_check' include { FASTQ_PREPROCESSING } from '../subworkflows/local/preprocessing' - +include { LONGREAD_PREPROCESSING } from '../subworkflows/local/longread_preprocessing' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -104,6 +104,10 @@ workflow TAXPROFILER { FASTQ_PREPROCESSING ( INPUT_CHECK.out.fastq ) } + LONGREAD_PREPROCESSING ( INPUT_CHECK.out.nanopore ) + + ch_versions = ch_versions.mix(LONGREAD_PREPROCESSING.out.versions.first()) + // // PERFORM RUN MERGING // @@ -191,6 +195,7 @@ workflow TAXPROFILER { ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) ch_multiqc_files = ch_multiqc_files.mix(CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml.collect()) ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}.ifEmpty([])) + ch_multiqc_files = ch_multiqc_files.mix(LONGREAD_PREPROCESSING.out.mqc) if (params.fastp_clip_merge) { ch_multiqc_files = ch_multiqc_files.mix(FASTQ_PREPROCESSING.out.mqc) } From d09a3c170edac3afb7e6932cbcca824d6b77e202 Mon Sep 17 00:00:00 2001 From: ljmesi <37740329+ljmesi@users.noreply.github.com> Date: Fri, 18 Mar 2022 15:46:03 +0100 Subject: [PATCH 10/16] Add Porechop --- CITATIONS.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CITATIONS.md b/CITATIONS.md index 192b2f4..53c53c3 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -15,6 +15,8 @@ * [MultiQC](https://pubmed.ncbi.nlm.nih.gov/27312411/) > Ewels P, Magnusson M, Lundin S, Käller M. MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics. 2016 Oct 1;32(19):3047-8. doi: 10.1093/bioinformatics/btw354. Epub 2016 Jun 16. PubMed PMID: 27312411; PubMed Central PMCID: PMC5039924. +* [Porechop](https://github.com/rrwick/Porechop) + ## Software packaging/containerisation tools * [Anaconda](https://anaconda.com) From 24a01529f5053c51a0f548ad04790f9bd5e3df9d Mon Sep 17 00:00:00 2001 From: ljmesi <37740329+ljmesi@users.noreply.github.com> Date: Fri, 18 Mar 2022 15:47:46 +0100 Subject: [PATCH 11/16] Add mentioning about Nanopore reads pre-processing --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 5d0c74b..622b8de 100644 --- a/README.md +++ b/README.md @@ -34,6 +34,7 @@ On release, automated continuous integration tests run the pipeline on a full-si - Low complexity filtering - Host read removal - Run merging + - Adapter and quality trimming of Nanopore reads 3. Performs taxonomic profiling a choice of: - Kraken2 - MetaPhlAn3 From c97de32434d03fdc97c0a5dc9c75cf328b027193 Mon Sep 17 00:00:00 2001 From: ljmesi <37740329+ljmesi@users.noreply.github.com> Date: Mon, 21 Mar 2022 18:17:08 +0100 Subject: [PATCH 12/16] Make adapter and quality trimming optional --- nextflow.config | 1 + workflows/taxprofiler.nf | 12 +++++++++--- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/nextflow.config b/nextflow.config index 7b897ab..4a3a56d 100644 --- a/nextflow.config +++ b/nextflow.config @@ -57,6 +57,7 @@ params { // FASTQ preprocessing fastp_clip_merge = false fastp_exclude_unmerged = true + remove_adapters = false // MALT run_malt = false diff --git a/workflows/taxprofiler.nf b/workflows/taxprofiler.nf index 9e52b59..0e144f3 100644 --- a/workflows/taxprofiler.nf +++ b/workflows/taxprofiler.nf @@ -104,9 +104,16 @@ workflow TAXPROFILER { FASTQ_PREPROCESSING ( INPUT_CHECK.out.fastq ) } - LONGREAD_PREPROCESSING ( INPUT_CHECK.out.nanopore ) + ch_multiqc_files = Channel.empty() + if ( params.remove_adapters ) { + ch_longreads_preprocessed = LONGREAD_PREPROCESSING ( INPUT_CHECK.out.nanopore ).reads + .map { it -> [ it[0], [it[1]] ] } ch_versions = ch_versions.mix(LONGREAD_PREPROCESSING.out.versions.first()) + ch_multiqc_files = ch_multiqc_files.mix(LONGREAD_PREPROCESSING.out.mqc) + } else { + ch_longreads_preprocessed = INPUT_CHECK.out.nanopore + } // // PERFORM RUN MERGING @@ -138,6 +145,7 @@ workflow TAXPROFILER { // output [DUMP: reads_plus_db] [['id':'2612', 'run_accession':'combined', 'instrument_platform':'ILLUMINA', 'single_end':1], /2612.merged.fastq.gz, ['tool':'malt', 'db_name':'mal95', 'db_params':'"-id 90"'], /malt90] ch_input_for_profiling = ch_reads_for_profiling + .mix( ch_longreads_preprocessed ) .combine(DB_CHECK.out.dbs) .dump(tag: "reads_plus_db") .branch { @@ -189,13 +197,11 @@ workflow TAXPROFILER { workflow_summary = WorkflowTaxprofiler.paramsSummaryMultiqc(workflow, summary_params) ch_workflow_summary = Channel.value(workflow_summary) - ch_multiqc_files = Channel.empty() ch_multiqc_files = ch_multiqc_files.mix(Channel.from(ch_multiqc_config)) ch_multiqc_files = ch_multiqc_files.mix(ch_multiqc_custom_config.collect().ifEmpty([])) ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) ch_multiqc_files = ch_multiqc_files.mix(CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml.collect()) ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}.ifEmpty([])) - ch_multiqc_files = ch_multiqc_files.mix(LONGREAD_PREPROCESSING.out.mqc) if (params.fastp_clip_merge) { ch_multiqc_files = ch_multiqc_files.mix(FASTQ_PREPROCESSING.out.mqc) } From f6fe26de466446379f49ba00dd80eb995bafd0bb Mon Sep 17 00:00:00 2001 From: ljmesi <37740329+ljmesi@users.noreply.github.com> Date: Mon, 21 Mar 2022 18:25:56 +0100 Subject: [PATCH 13/16] Rename shortread subworkflow to be more consistent --- .../{preprocessing.nf => shortread_preprocessing.nf} | 0 workflows/taxprofiler.nf | 8 ++++---- 2 files changed, 4 insertions(+), 4 deletions(-) rename subworkflows/local/{preprocessing.nf => shortread_preprocessing.nf} (100%) diff --git a/subworkflows/local/preprocessing.nf b/subworkflows/local/shortread_preprocessing.nf similarity index 100% rename from subworkflows/local/preprocessing.nf rename to subworkflows/local/shortread_preprocessing.nf diff --git a/workflows/taxprofiler.nf b/workflows/taxprofiler.nf index 0e144f3..22c7518 100644 --- a/workflows/taxprofiler.nf +++ b/workflows/taxprofiler.nf @@ -39,7 +39,7 @@ ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath(params.multi include { INPUT_CHECK } from '../subworkflows/local/input_check' include { DB_CHECK } from '../subworkflows/local/db_check' -include { FASTQ_PREPROCESSING } from '../subworkflows/local/preprocessing' +include { SHORTREAD_PREPROCESSING } from '../subworkflows/local/shortread_preprocessing' include { LONGREAD_PREPROCESSING } from '../subworkflows/local/longread_preprocessing' /* @@ -101,7 +101,7 @@ workflow TAXPROFILER { // PERFORM PREPROCESSING // if ( params.fastp_clip_merge ) { - FASTQ_PREPROCESSING ( INPUT_CHECK.out.fastq ) + SHORTREAD_PREPROCESSING ( INPUT_CHECK.out.fastq ) } ch_multiqc_files = Channel.empty() @@ -118,7 +118,7 @@ workflow TAXPROFILER { // // PERFORM RUN MERGING // - ch_processed_for_combine = FASTQ_PREPROCESSING.out.reads + ch_processed_for_combine = SHORTREAD_PREPROCESSING.out.reads .dump(tag: "prep_for_combine_grouping") .map { meta, reads -> @@ -203,7 +203,7 @@ workflow TAXPROFILER { ch_multiqc_files = ch_multiqc_files.mix(CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml.collect()) ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}.ifEmpty([])) if (params.fastp_clip_merge) { - ch_multiqc_files = ch_multiqc_files.mix(FASTQ_PREPROCESSING.out.mqc) + ch_multiqc_files = ch_multiqc_files.mix(SHORTREAD_PREPROCESSING.out.mqc) } if (params.run_kraken2) { ch_multiqc_files = ch_multiqc_files.mix(KRAKEN2_KRAKEN2.out.txt.collect{it[1]}.ifEmpty([])) From 4940ec57ffee76378b9bfe8dcff1d73f3097846e Mon Sep 17 00:00:00 2001 From: ljmesi <37740329+ljmesi@users.noreply.github.com> Date: Mon, 21 Mar 2022 18:26:26 +0100 Subject: [PATCH 14/16] Remove unnecessary extra point --- README.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/README.md b/README.md index 622b8de..d454a9b 100644 --- a/README.md +++ b/README.md @@ -30,11 +30,10 @@ On release, automated continuous integration tests run the pipeline on a full-si 1. Read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/)) 2. Performs optional read pre-processing - - Adapter clipping and merging + - Adapter clipping and merging (short, and nanopore reads) - Low complexity filtering - Host read removal - Run merging - - Adapter and quality trimming of Nanopore reads 3. Performs taxonomic profiling a choice of: - Kraken2 - MetaPhlAn3 From 5b1b48e59e17d271d03450daca083dd8cec502af Mon Sep 17 00:00:00 2001 From: ljmesi <37740329+ljmesi@users.noreply.github.com> Date: Mon, 21 Mar 2022 18:29:47 +0100 Subject: [PATCH 15/16] Update subworkflow name to be more consistent --- subworkflows/local/shortread_preprocessing.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/subworkflows/local/shortread_preprocessing.nf b/subworkflows/local/shortread_preprocessing.nf index 5832824..406c198 100644 --- a/subworkflows/local/shortread_preprocessing.nf +++ b/subworkflows/local/shortread_preprocessing.nf @@ -7,7 +7,7 @@ include { FASTP as FASTP_SINGLE } from '../../modules/nf-core/modules/fast include { FASTP as FASTP_PAIRED } from '../../modules/nf-core/modules/fastp/main' include { FASTQC as FASTQC_POST } from '../../modules/nf-core/modules/fastqc/main' -workflow FASTQ_PREPROCESSING { +workflow SHORTREAD_PREPROCESSING { take: reads // file: /path/to/samplesheet.csv From 80129985424b214ee22213a8db7cc139e2793ff5 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Mon, 21 Mar 2022 19:52:50 +0100 Subject: [PATCH 16/16] Make parameter naming more consistent for clipmerge --- conf/modules.config | 5 ++--- nextflow.config | 6 ++--- subworkflows/local/shortread_preprocessing.nf | 2 +- workflows/taxprofiler.nf | 22 ++++++++++++------- 4 files changed, 20 insertions(+), 15 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 050772e..c09a011 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -41,7 +41,7 @@ process { // TODO also include option to NOT merge ext.args = [ { ${meta.single_end} } == 0 ? "-m" : '', - params.fastp_exclude_unmerged ? '' : "--include_unmerged" + params.shortread_excludeunmerged ? '' : "--include_unmerged" ].join(' ').trim() publishDir = [ path: { "${params.outdir}/fastp" }, @@ -84,7 +84,7 @@ process { pattern: '*.{rma6,tab,text,sam,log}' ] ext.args = { "${meta.db_params}" } - ext.when = params.run_malt + ext.prefix = { "${meta.id}-${meta.db_name}" } } withName: KRAKEN2_KRAKEN2 { @@ -94,7 +94,6 @@ process { pattern: '.{fastq.gz,txt}' ] ext.args = { "${meta.db_params}" } - ext.when = params.run_kraken2 ext.prefix = { "${meta.id}-${meta.db_name}" } } diff --git a/nextflow.config b/nextflow.config index 4a3a56d..5f7aec6 100644 --- a/nextflow.config +++ b/nextflow.config @@ -55,9 +55,9 @@ params { databases = null // FASTQ preprocessing - fastp_clip_merge = false - fastp_exclude_unmerged = true - remove_adapters = false + shortread_clipmerge = false + shortread_excludeunmerged = true + longread_clip = false // MALT run_malt = false diff --git a/subworkflows/local/shortread_preprocessing.nf b/subworkflows/local/shortread_preprocessing.nf index 406c198..d996a76 100644 --- a/subworkflows/local/shortread_preprocessing.nf +++ b/subworkflows/local/shortread_preprocessing.nf @@ -23,7 +23,7 @@ workflow SHORTREAD_PREPROCESSING { // TODO move to subworkflow - if ( params.fastp_clip_merge ) { + if ( params.shortread_clipmerge ) { ch_input_for_fastp = reads .dump(tag: "pre-fastp_branch") diff --git a/workflows/taxprofiler.nf b/workflows/taxprofiler.nf index 22c7518..4aa0684 100644 --- a/workflows/taxprofiler.nf +++ b/workflows/taxprofiler.nf @@ -100,17 +100,14 @@ workflow TAXPROFILER { // // PERFORM PREPROCESSING // - if ( params.fastp_clip_merge ) { + if ( params.shortread_clipmerge ) { SHORTREAD_PREPROCESSING ( INPUT_CHECK.out.fastq ) } - ch_multiqc_files = Channel.empty() - - if ( params.remove_adapters ) { + if ( params.longread_clip ) { ch_longreads_preprocessed = LONGREAD_PREPROCESSING ( INPUT_CHECK.out.nanopore ).reads .map { it -> [ it[0], [it[1]] ] } ch_versions = ch_versions.mix(LONGREAD_PREPROCESSING.out.versions.first()) - ch_multiqc_files = ch_multiqc_files.mix(LONGREAD_PREPROCESSING.out.mqc) } else { ch_longreads_preprocessed = INPUT_CHECK.out.nanopore } @@ -187,9 +184,13 @@ workflow TAXPROFILER { // // RUN PROFILING // - MALT_RUN ( ch_input_for_malt.reads, params.malt_mode, ch_input_for_malt.db ) - KRAKEN2_KRAKEN2 ( ch_input_for_kraken2.reads, ch_input_for_kraken2.db ) + if ( params.run_malt ) { + MALT_RUN ( ch_input_for_malt.reads, params.malt_mode, ch_input_for_malt.db ) + } + if ( params.run_kraken2 ) { + KRAKEN2_KRAKEN2 ( ch_input_for_kraken2.reads, ch_input_for_kraken2.db ) + } // // MODULE: MultiQC @@ -197,14 +198,19 @@ workflow TAXPROFILER { workflow_summary = WorkflowTaxprofiler.paramsSummaryMultiqc(workflow, summary_params) ch_workflow_summary = Channel.value(workflow_summary) + ch_multiqc_files = Channel.empty() ch_multiqc_files = ch_multiqc_files.mix(Channel.from(ch_multiqc_config)) ch_multiqc_files = ch_multiqc_files.mix(ch_multiqc_custom_config.collect().ifEmpty([])) ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) ch_multiqc_files = ch_multiqc_files.mix(CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml.collect()) ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}.ifEmpty([])) - if (params.fastp_clip_merge) { + + if (params.shortread_clipmerge) { ch_multiqc_files = ch_multiqc_files.mix(SHORTREAD_PREPROCESSING.out.mqc) } + if (params.longread_clip) { + ch_multiqc_files = ch_multiqc_files.mix(LONGREAD_PREPROCESSING.out.mqc) + } if (params.run_kraken2) { ch_multiqc_files = ch_multiqc_files.mix(KRAKEN2_KRAKEN2.out.txt.collect{it[1]}.ifEmpty([])) ch_versions = ch_versions.mix(KRAKEN2_KRAKEN2.out.versions.first())