From 482112bb42ddd314de8e92b6a3c1a94e530828f7 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Sun, 3 Apr 2022 07:58:40 +0200 Subject: [PATCH 01/25] Start work on host removal --- modules.json | 11 ++- modules/nf-core/modules/bowtie2/align/main.nf | 77 +++++++++++++++++++ .../nf-core/modules/bowtie2/align/meta.yml | 51 ++++++++++++ modules/nf-core/modules/bowtie2/build/main.nf | 30 ++++++++ .../nf-core/modules/bowtie2/build/meta.yml | 33 ++++++++ .../nf-core/modules/samtools/flagstat/main.nf | 34 ++++++++ .../modules/samtools/flagstat/meta.yml | 49 ++++++++++++ nextflow.config | 4 + subworkflows/local/shortread_hostremoval.nf | 39 ++++++++++ workflows/taxprofiler.nf | 12 ++- 10 files changed, 336 insertions(+), 4 deletions(-) create mode 100644 modules/nf-core/modules/bowtie2/align/main.nf create mode 100644 modules/nf-core/modules/bowtie2/align/meta.yml create mode 100644 modules/nf-core/modules/bowtie2/build/main.nf create mode 100644 modules/nf-core/modules/bowtie2/build/meta.yml create mode 100644 modules/nf-core/modules/samtools/flagstat/main.nf create mode 100644 modules/nf-core/modules/samtools/flagstat/meta.yml create mode 100644 subworkflows/local/shortread_hostremoval.nf diff --git a/modules.json b/modules.json index dcfbd3f..7c3facc 100644 --- a/modules.json +++ b/modules.json @@ -6,6 +6,12 @@ "adapterremoval": { "git_sha": "f0800157544a82ae222931764483331a81812012" }, + "bowtie2/align": { + "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + }, + "bowtie2/build": { + "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + }, "cat/fastq": { "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" }, @@ -30,9 +36,12 @@ "porechop": { "git_sha": "e20e57f90b6787ac9a010a980cf6ea98bd990046" }, + "samtools/flagstat": { + "git_sha": "1ad73f1b2abdea9398680d6d20014838135c9a35" + }, "untar": { "git_sha": "e080f4c8acf5760039ed12ec1f206170f3f9a918" } } } -} +} \ No newline at end of file diff --git a/modules/nf-core/modules/bowtie2/align/main.nf b/modules/nf-core/modules/bowtie2/align/main.nf new file mode 100644 index 0000000..7e8a965 --- /dev/null +++ b/modules/nf-core/modules/bowtie2/align/main.nf @@ -0,0 +1,77 @@ +process BOWTIE2_ALIGN { + tag "$meta.id" + label 'process_high' + + conda (params.enable_conda ? 'bioconda::bowtie2=2.4.4 bioconda::samtools=1.14 conda-forge::pigz=2.6' : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-ac74a7f02cebcfcc07d8e8d1d750af9c83b4d45a:4d235f41348a00533f18e47c9669f1ecb327f629-0' : + 'quay.io/biocontainers/mulled-v2-ac74a7f02cebcfcc07d8e8d1d750af9c83b4d45a:4d235f41348a00533f18e47c9669f1ecb327f629-0' }" + + input: + tuple val(meta), path(reads) + path index + val save_unaligned + + output: + tuple val(meta), path('*.bam') , emit: bam + tuple val(meta), path('*.log') , emit: log + tuple val(meta), path('*fastq.gz'), emit: fastq, optional:true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + if (meta.single_end) { + def unaligned = save_unaligned ? "--un-gz ${prefix}.unmapped.fastq.gz" : '' + """ + INDEX=`find -L ./ -name "*.rev.1.bt2" | sed 's/.rev.1.bt2//'` + bowtie2 \\ + -x \$INDEX \\ + -U $reads \\ + --threads $task.cpus \\ + $unaligned \\ + $args \\ + 2> ${prefix}.bowtie2.log \\ + | samtools view -@ $task.cpus $args2 -bhS -o ${prefix}.bam - + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bowtie2: \$(echo \$(bowtie2 --version 2>&1) | sed 's/^.*bowtie2-align-s version //; s/ .*\$//') + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' ) + END_VERSIONS + """ + } else { + def unaligned = save_unaligned ? "--un-conc-gz ${prefix}.unmapped.fastq.gz" : '' + """ + INDEX=`find -L ./ -name "*.rev.1.bt2" | sed 's/.rev.1.bt2//'` + bowtie2 \\ + -x \$INDEX \\ + -1 ${reads[0]} \\ + -2 ${reads[1]} \\ + --threads $task.cpus \\ + $unaligned \\ + $args \\ + 2> ${prefix}.bowtie2.log \\ + | samtools view -@ $task.cpus $args2 -bhS -o ${prefix}.bam - + + if [ -f ${prefix}.unmapped.fastq.1.gz ]; then + mv ${prefix}.unmapped.fastq.1.gz ${prefix}.unmapped_1.fastq.gz + fi + if [ -f ${prefix}.unmapped.fastq.2.gz ]; then + mv ${prefix}.unmapped.fastq.2.gz ${prefix}.unmapped_2.fastq.gz + fi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bowtie2: \$(echo \$(bowtie2 --version 2>&1) | sed 's/^.*bowtie2-align-s version //; s/ .*\$//') + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' ) + END_VERSIONS + """ + } +} diff --git a/modules/nf-core/modules/bowtie2/align/meta.yml b/modules/nf-core/modules/bowtie2/align/meta.yml new file mode 100644 index 0000000..f80421e --- /dev/null +++ b/modules/nf-core/modules/bowtie2/align/meta.yml @@ -0,0 +1,51 @@ +name: bowtie2_align +description: Align reads to a reference genome using bowtie2 +keywords: + - align + - fasta + - genome + - reference +tools: + - bowtie2: + description: | + Bowtie 2 is an ultrafast and memory-efficient tool for aligning + sequencing reads to long reference sequences. + homepage: http://bowtie-bio.sourceforge.net/bowtie2/index.shtml + documentation: http://bowtie-bio.sourceforge.net/bowtie2/manual.shtml + doi: 10.1038/nmeth.1923 + licence: ["GPL-3.0-or-later"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. + - index: + type: file + description: Bowtie2 genome index files + pattern: "*.ebwt" +output: + - bam: + type: file + description: Output BAM file containing read alignments + pattern: "*.{bam}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - fastq: + type: file + description: Unaligned FastQ files + pattern: "*.fastq.gz" + - log: + type: file + description: Aligment log + pattern: "*.log" +authors: + - "@joseespinosa" + - "@drpatelh" diff --git a/modules/nf-core/modules/bowtie2/build/main.nf b/modules/nf-core/modules/bowtie2/build/main.nf new file mode 100644 index 0000000..a4da62d --- /dev/null +++ b/modules/nf-core/modules/bowtie2/build/main.nf @@ -0,0 +1,30 @@ +process BOWTIE2_BUILD { + tag "$fasta" + label 'process_high' + + conda (params.enable_conda ? 'bioconda::bowtie2=2.4.4' : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/bowtie2:2.4.4--py39hbb4e92a_0' : + 'quay.io/biocontainers/bowtie2:2.4.4--py39hbb4e92a_0' }" + + input: + path fasta + + output: + path 'bowtie2' , emit: index + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + """ + mkdir bowtie2 + bowtie2-build $args --threads $task.cpus $fasta bowtie2/${fasta.baseName} + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bowtie2: \$(echo \$(bowtie2 --version 2>&1) | sed 's/^.*bowtie2-align-s version //; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/modules/bowtie2/build/meta.yml b/modules/nf-core/modules/bowtie2/build/meta.yml new file mode 100644 index 0000000..2da9a21 --- /dev/null +++ b/modules/nf-core/modules/bowtie2/build/meta.yml @@ -0,0 +1,33 @@ +name: bowtie2_build +description: Builds bowtie index for reference genome +keywords: + - build + - index + - fasta + - genome + - reference +tools: + - bowtie2: + description: | + Bowtie 2 is an ultrafast and memory-efficient tool for aligning + sequencing reads to long reference sequences. + homepage: http://bowtie-bio.sourceforge.net/bowtie2/index.shtml + documentation: http://bowtie-bio.sourceforge.net/bowtie2/manual.shtml + doi: 10.1038/nmeth.1923 + licence: ["GPL-3.0-or-later"] +input: + - fasta: + type: file + description: Input genome fasta file +output: + - index: + type: file + description: Bowtie2 genome index files + pattern: "*.bt2" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@joseespinosa" + - "@drpatelh" diff --git a/modules/nf-core/modules/samtools/flagstat/main.nf b/modules/nf-core/modules/samtools/flagstat/main.nf new file mode 100644 index 0000000..9e3440a --- /dev/null +++ b/modules/nf-core/modules/samtools/flagstat/main.nf @@ -0,0 +1,34 @@ +process SAMTOOLS_FLAGSTAT { + tag "$meta.id" + label 'process_low' + + conda (params.enable_conda ? "bioconda::samtools=1.15" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.15--h1170115_1' : + 'quay.io/biocontainers/samtools:1.15--h1170115_1' }" + + input: + tuple val(meta), path(bam), path(bai) + + output: + tuple val(meta), path("*.flagstat"), emit: flagstat + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + """ + samtools \\ + flagstat \\ + --threads ${task.cpus-1} \\ + $bam \\ + > ${bam}.flagstat + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/modules/samtools/flagstat/meta.yml b/modules/nf-core/modules/samtools/flagstat/meta.yml new file mode 100644 index 0000000..9526906 --- /dev/null +++ b/modules/nf-core/modules/samtools/flagstat/meta.yml @@ -0,0 +1,49 @@ +name: samtools_flagstat +description: Counts the number of alignments in a BAM/CRAM/SAM file for each FLAG type +keywords: + - stats + - mapping + - counts + - bam + - sam + - cram +tools: + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: hhttp://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + - bai: + type: file + description: Index for BAM/CRAM/SAM file + pattern: "*.{bai,crai,sai}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - flagstat: + type: file + description: File containing samtools flagstat output + pattern: "*.{flagstat}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" diff --git a/nextflow.config b/nextflow.config index 7be36a6..8aa8722 100644 --- a/nextflow.config +++ b/nextflow.config @@ -65,6 +65,10 @@ params { shortread_clipmerge_minlength = 15 longread_clip = false + // Host Removal + shortread_hostremoval_reference = null + shortread_hostremoval_index = null + // MALT run_malt = false malt_mode = 'BlastN' diff --git a/subworkflows/local/shortread_hostremoval.nf b/subworkflows/local/shortread_hostremoval.nf new file mode 100644 index 0000000..4b0861c --- /dev/null +++ b/subworkflows/local/shortread_hostremoval.nf @@ -0,0 +1,39 @@ +// +// Remove host reads via alignment and export off-target reads +// + +include { BOWTIE2_ALIGN } from '../../../modules/nf-core/modules/bowtie2/align/main' +include { BOWTIE2_BUILD } from '../../../modules/nf-core/modules/bowtie2/build/main' +include { SAMTOOLS_VIEW } from '../../../modules/nf-core/modules/samtools/view/main' +include { SAMTOOLS_FASTQ } from '../../../modules/nf-core/modules/samtools/fastq/main' +include { SAMTOOLS_FLAGSTAT } from '../../../modules/nf-core/modules/samtools/flagstat/main' + +workflow SHORTREAD_PREPROCESSING { + take: + reads // [ [ meta ], [ reads ] ] + reference // /path/to/fasta + + main: + ch_versions = Channel.empty() + ch_multiqc_files = Channel.empty() + + if ( !params.shortread_hostremoval_index ) { + file( , checkIfExists: true ) + BOWTIE2_BUILD ( reference ) + ch_versions = ch_versions.mix( BOWTIE2_BUILD.out.versions ) + } + + BOWTIE2_ALIGN ( reads, BOWTIE2_BUILD.out.index ) + ch_versions = ch_versions.mix( BOWTIE2_BUILD.out.versions ) + ch_multiqc_files = ch_multiqc_files.mix( SAMTOOLS_FLAGSTAT.out.log ) + + SAMTOOLS_FLAGSTAT ( BOWTIE2_ALIGN.out.bam ) + ch_versions = ch_versions.mix( SAMTOOLS_FLAGSTAT.out.versions ) + ch_multiqc_files = ch_multiqc_files.mix( SAMTOOLS_FLAGSTAT.out.flagstat ) + + emit: + reads = BOWTIE2_ALIGN.out.fastq // channel: [ val(meta), [ reads ] ] + versions = ch_versions // channel: [ versions.yml ] + mqc = ch_multiqc_files +} + diff --git a/workflows/taxprofiler.nf b/workflows/taxprofiler.nf index ce89a91..c5678f1 100644 --- a/workflows/taxprofiler.nf +++ b/workflows/taxprofiler.nf @@ -11,15 +11,21 @@ WorkflowTaxprofiler.initialise(params, log) // TODO nf-core: Add all file path parameters for the pipeline to the list below // Check input path parameters to see if they exist -def checkPathParamList = [ params.input, params.databases, params.multiqc_config ] +def checkPathParamList = [ params.input, params.databases, params.shortread_hostremoval_reference, + params.shortread_hostremoval_index, params.multiqc_config + ] for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true) } } // Check mandatory parameters -if (params.input ) { ch_input = file(params.input) } else { exit 1, 'Input samplesheet not specified!' } -if (params.databases) { ch_databases = file(params.databases) } else { exit 1, 'Input database sheet not specified!' } +if (params.input ) { ch_input = file(params.input) } else { exit 1, 'Input samplesheet not specified!' } +if (params.databases ) { ch_databases = file(params.databases) } else { exit 1, 'Input database sheet not specified!' } if (params.shortread_clipmerge_mergepairs && params.run_malt ) log.warn "[nf-core/taxprofiler] warning: MALT does not except uncollapsed paired-reads. Pairs will be profiled as separate files." if (params.shortread_clipmerge_excludeunmerged && !params.shortread_clipmerge_mergepairs) exit 1, "[nf-core/taxprofiler] error: cannot include unmerged reads when merging not turned on. Please specify --shortread_clipmerge_mergepairs" +// TODO Add check if index but no reference exit 1 +if (params.shortread_hostremoval_reference ) { ch_reference = file(params.shortread_hostremoval_reference) } else { } +if (params.shortread_hostremoval_index) { ch_reference_index = file(params.shortread_hostremoval_index ) } else { ch_reference_index = [] } + /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ CONFIG FILES From 066ceb2bcaeb21c0a2a07fd1a5359e30277869ce Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Sun, 3 Apr 2022 17:23:14 +0200 Subject: [PATCH 02/25] Remove flagstat as bowtie2 reports this itself --- conf/modules.config | 19 ++++++- conf/test.config | 12 +++-- modules.json | 3 -- .../nf-core/modules/samtools/flagstat/main.nf | 34 ------------- .../modules/samtools/flagstat/meta.yml | 49 ------------------- nextflow.config | 1 + nextflow_schema.json | 14 +++++- subworkflows/local/shortread_hostremoval.nf | 29 +++++------ workflows/taxprofiler.nf | 27 ++++++++-- 9 files changed, 72 insertions(+), 116 deletions(-) delete mode 100644 modules/nf-core/modules/samtools/flagstat/main.nf delete mode 100644 modules/nf-core/modules/samtools/flagstat/meta.yml diff --git a/conf/modules.config b/conf/modules.config index dc8b138..41faa62 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -132,7 +132,6 @@ process { ] } - withName: PORECHOP { ext.prefix = { "${meta.id}_${meta.run_accession}" } publishDir = [ @@ -142,6 +141,24 @@ process { ] } + withName: BOWTIE2_BUILD { + ext.prefix = { "${meta.id}_${meta.run_accession}" } + publishDir = [ + path: { "${params.outdir}/bowtie2/build" }, + mode: 'copy', + pattern: '*.bt2' + ] + } + + withName: BOWTIE2_ALIGN { + ext.prefix = { "${meta.id}_${meta.run_accession}" } + publishDir = [ + path: { "${params.outdir}/bowtie2/align" }, + mode: 'copy', + pattern: '*.{fastq.gz,bam}' + ] + } + withName: CAT_FASTQ { publishDir = [ path: { "${params.outdir}/prepared_sequences" }, diff --git a/conf/test.config b/conf/test.config index 92a10e4..90ea241 100644 --- a/conf/test.config +++ b/conf/test.config @@ -22,10 +22,12 @@ params { // Input data // TODO nf-core: Specify the paths to your test data on nf-core/test-datasets // TODO nf-core: Give any required params for the test so that command line flags are not needed - input = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv' - databases = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database.csv' - run_kraken2 = true - run_malt = true - shortread_clipmerge = true + input = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv' + databases = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database.csv' + run_kraken2 = true + run_malt = true + shortread_clipmerge = true + shortread_hostremoval = true + shortread_hostremoval_reference = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta' } diff --git a/modules.json b/modules.json index 7c3facc..7395d68 100644 --- a/modules.json +++ b/modules.json @@ -36,9 +36,6 @@ "porechop": { "git_sha": "e20e57f90b6787ac9a010a980cf6ea98bd990046" }, - "samtools/flagstat": { - "git_sha": "1ad73f1b2abdea9398680d6d20014838135c9a35" - }, "untar": { "git_sha": "e080f4c8acf5760039ed12ec1f206170f3f9a918" } diff --git a/modules/nf-core/modules/samtools/flagstat/main.nf b/modules/nf-core/modules/samtools/flagstat/main.nf deleted file mode 100644 index 9e3440a..0000000 --- a/modules/nf-core/modules/samtools/flagstat/main.nf +++ /dev/null @@ -1,34 +0,0 @@ -process SAMTOOLS_FLAGSTAT { - tag "$meta.id" - label 'process_low' - - conda (params.enable_conda ? "bioconda::samtools=1.15" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.15--h1170115_1' : - 'quay.io/biocontainers/samtools:1.15--h1170115_1' }" - - input: - tuple val(meta), path(bam), path(bai) - - output: - tuple val(meta), path("*.flagstat"), emit: flagstat - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - """ - samtools \\ - flagstat \\ - --threads ${task.cpus-1} \\ - $bam \\ - > ${bam}.flagstat - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') - END_VERSIONS - """ -} diff --git a/modules/nf-core/modules/samtools/flagstat/meta.yml b/modules/nf-core/modules/samtools/flagstat/meta.yml deleted file mode 100644 index 9526906..0000000 --- a/modules/nf-core/modules/samtools/flagstat/meta.yml +++ /dev/null @@ -1,49 +0,0 @@ -name: samtools_flagstat -description: Counts the number of alignments in a BAM/CRAM/SAM file for each FLAG type -keywords: - - stats - - mapping - - counts - - bam - - sam - - cram -tools: - - samtools: - description: | - SAMtools is a set of utilities for interacting with and post-processing - short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. - These files are generated as output by short read aligners like BWA. - homepage: http://www.htslib.org/ - documentation: hhttp://www.htslib.org/doc/samtools.html - doi: 10.1093/bioinformatics/btp352 - licence: ["MIT"] -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - bam: - type: file - description: BAM/CRAM/SAM file - pattern: "*.{bam,cram,sam}" - - bai: - type: file - description: Index for BAM/CRAM/SAM file - pattern: "*.{bai,crai,sai}" -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - flagstat: - type: file - description: File containing samtools flagstat output - pattern: "*.{flagstat}" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" -authors: - - "@drpatelh" diff --git a/nextflow.config b/nextflow.config index 8aa8722..e559a85 100644 --- a/nextflow.config +++ b/nextflow.config @@ -66,6 +66,7 @@ params { longread_clip = false // Host Removal + shortread_hostremoval = false shortread_hostremoval_reference = null shortread_hostremoval_index = null diff --git a/nextflow_schema.json b/nextflow_schema.json index fb2ca31..0b5162b 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -266,8 +266,7 @@ "type": "boolean" }, "shortread_clipmerge_excludeunmerged": { - "type": "boolean", - "default": false + "type": "boolean" }, "longread_clip": { "type": "boolean" @@ -304,6 +303,17 @@ "shortread_clipmerge_minlength": { "type": "integer", "default": 15 + }, + "shortread_hostremoval": { + "type": "boolean" + }, + "shortread_hostremoval_reference": { + "type": "string", + "default": null + }, + "shortread_hostremoval_index": { + "type": "string", + "default": null } } } diff --git a/subworkflows/local/shortread_hostremoval.nf b/subworkflows/local/shortread_hostremoval.nf index 4b0861c..505f989 100644 --- a/subworkflows/local/shortread_hostremoval.nf +++ b/subworkflows/local/shortread_hostremoval.nf @@ -2,38 +2,33 @@ // Remove host reads via alignment and export off-target reads // -include { BOWTIE2_ALIGN } from '../../../modules/nf-core/modules/bowtie2/align/main' -include { BOWTIE2_BUILD } from '../../../modules/nf-core/modules/bowtie2/build/main' -include { SAMTOOLS_VIEW } from '../../../modules/nf-core/modules/samtools/view/main' -include { SAMTOOLS_FASTQ } from '../../../modules/nf-core/modules/samtools/fastq/main' -include { SAMTOOLS_FLAGSTAT } from '../../../modules/nf-core/modules/samtools/flagstat/main' +include { BOWTIE2_BUILD } from '../../modules/nf-core/modules/bowtie2/build/main' +include { BOWTIE2_ALIGN } from '../../modules/nf-core/modules/bowtie2/align/main' -workflow SHORTREAD_PREPROCESSING { +workflow SHORTREAD_HOSTREMOVAL { take: reads // [ [ meta ], [ reads ] ] reference // /path/to/fasta + index // /path/to/index main: ch_versions = Channel.empty() ch_multiqc_files = Channel.empty() if ( !params.shortread_hostremoval_index ) { - file( , checkIfExists: true ) - BOWTIE2_BUILD ( reference ) - ch_versions = ch_versions.mix( BOWTIE2_BUILD.out.versions ) + ch_bowtie2_index = BOWTIE2_BUILD ( reference ).index + ch_versions = ch_versions.mix( BOWTIE2_BUILD.out.versions ) + } else { + ch_bowtie2_index = index.first() } - BOWTIE2_ALIGN ( reads, BOWTIE2_BUILD.out.index ) - ch_versions = ch_versions.mix( BOWTIE2_BUILD.out.versions ) - ch_multiqc_files = ch_multiqc_files.mix( SAMTOOLS_FLAGSTAT.out.log ) - - SAMTOOLS_FLAGSTAT ( BOWTIE2_ALIGN.out.bam ) - ch_versions = ch_versions.mix( SAMTOOLS_FLAGSTAT.out.versions ) - ch_multiqc_files = ch_multiqc_files.mix( SAMTOOLS_FLAGSTAT.out.flagstat ) + BOWTIE2_ALIGN ( reads, ch_bowtie2_index, true ) + ch_versions = ch_versions.mix( BOWTIE2_ALIGN.out.versions.first() ) + ch_multiqc_files = ch_multiqc_files.mix( BOWTIE2_ALIGN.out.log ) emit: reads = BOWTIE2_ALIGN.out.fastq // channel: [ val(meta), [ reads ] ] - versions = ch_versions // channel: [ versions.yml ] + versions = ch_versions // channel: [ versions.yml ] mqc = ch_multiqc_files } diff --git a/workflows/taxprofiler.nf b/workflows/taxprofiler.nf index c5678f1..631aee6 100644 --- a/workflows/taxprofiler.nf +++ b/workflows/taxprofiler.nf @@ -23,8 +23,11 @@ if (params.shortread_clipmerge_mergepairs && params.run_malt ) log.warn "[nf-cor if (params.shortread_clipmerge_excludeunmerged && !params.shortread_clipmerge_mergepairs) exit 1, "[nf-core/taxprofiler] error: cannot include unmerged reads when merging not turned on. Please specify --shortread_clipmerge_mergepairs" // TODO Add check if index but no reference exit 1 -if (params.shortread_hostremoval_reference ) { ch_reference = file(params.shortread_hostremoval_reference) } else { } -if (params.shortread_hostremoval_index) { ch_reference_index = file(params.shortread_hostremoval_index ) } else { ch_reference_index = [] } +if (params.shortread_hostremoval && !params.shortread_hostremoval_reference) { exit 1, "[nf-core/taxprofiler] error: --shortread_hostremoval requested but no --shortread_hostremoval_reference FASTA supplied. Check input." } +if (!params.shortread_hostremoval_reference && params.shortread_hostremoval_reference_index) { exit 1, "[nf-core/taxprofiler] error: --shortread_hostremoval_index provided but no --shortread_hostremoval_reference FASTA supplied. Check input." } + +if (params.shortread_hostremoval_reference ) { ch_reference = file(params.shortread_hostremoval_reference) } else { ch_reference = [] } +if (params.shortread_hostremoval_index ) { ch_reference_index = file(params.shortread_hostremoval_index ) } else { ch_reference_index = [] } /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -49,6 +52,7 @@ include { INPUT_CHECK } from '../subworkflows/local/input_check' include { DB_CHECK } from '../subworkflows/local/db_check' include { SHORTREAD_PREPROCESSING } from '../subworkflows/local/shortread_preprocessing' include { LONGREAD_PREPROCESSING } from '../subworkflows/local/longread_preprocessing' +include { SHORTREAD_HOSTREMOVAL } from '../subworkflows/local/shortread_hostremoval' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -120,17 +124,24 @@ workflow TAXPROFILER { if ( params.longread_clip ) { ch_longreads_preprocessed = LONGREAD_PREPROCESSING ( INPUT_CHECK.out.nanopore ).reads .map { it -> [ it[0], [it[1]] ] } - ch_versions = ch_versions.mix(LONGREAD_PREPROCESSING.out.versions.first()) - } else { + ch_versions = ch_versions.mix(LONGREAD_PREPROCESSING.out.versions.first()) + } else {SHORTREAD_HOSTREMOVAL ch_longreads_preprocessed = INPUT_CHECK.out.nanopore } + if ( params.shortread_hostremoval ) { + ch_shortreads_hostremoved = SHORTREAD_HOSTREMOVAL ( ch_shortreads_preprocessed, ch_reference, ch_reference_index ).reads + ch_versions = ch_versions.mix(SHORTREAD_HOSTREMOVAL.out.versions.first()) + } else { + ch_shortreads_hostremoved = ch_shortreads_preprocessed + } + /* COMBINE READS WITH POSSIBLE DATABASES */ // e.g. output [DUMP: reads_plus_db] [['id':'2612', 'run_accession':'combined', 'instrument_platform':'ILLUMINA', 'single_end':1], /2612.merged.fastq.gz, ['tool':'malt', 'db_name':'mal95', 'db_params':'"-id 90"'], /malt90] - ch_input_for_profiling = ch_shortreads_preprocessed + ch_input_for_profiling = ch_shortreads_hostremoved .mix( ch_longreads_preprocessed ) .combine(DB_CHECK.out.dbs) .branch { @@ -196,9 +207,15 @@ workflow TAXPROFILER { if (params.shortread_clipmerge) { ch_multiqc_files = ch_multiqc_files.mix(SHORTREAD_PREPROCESSING.out.mqc) } + + if (params.shortread_hostremoval) { + ch_multiqc_files = ch_multiqc_files.mix(SHORTREAD_HOSTREMOVAL.out.mqc.collect{it[1]}.ifEmpty([])) + } + if (params.longread_clip) { ch_multiqc_files = ch_multiqc_files.mix(LONGREAD_PREPROCESSING.out.mqc) } + if (params.run_kraken2) { ch_multiqc_files = ch_multiqc_files.mix(KRAKEN2_KRAKEN2.out.txt.collect{it[1]}.ifEmpty([])) ch_versions = ch_versions.mix(KRAKEN2_KRAKEN2.out.versions.first()) From a76576c16b44be09548f8f8c6bede81aef2a4d99 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Sun, 3 Apr 2022 17:24:50 +0200 Subject: [PATCH 03/25] Prettier --- modules.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules.json b/modules.json index 398c351..cd1a0a9 100644 --- a/modules.json +++ b/modules.json @@ -44,4 +44,4 @@ } } } -} \ No newline at end of file +} From 7b08c49cd6cb6471384a26c1202733cad0fe58ae Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Fri, 8 Apr 2022 11:54:54 +0200 Subject: [PATCH 04/25] Re-add run merging and gonna let GHA see if it works >.> --- .github/workflows/ci.yml | 1 + nextflow.config | 2 ++ workflows/taxprofiler.nf | 25 +++++++++++++++++++++++-- 3 files changed, 26 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 79148f0..7678645 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -39,6 +39,7 @@ jobs: - "--shortread_clipmerge_tool adapterremoval --shortread_clipmerge_mergepairs" - "--shortread_complexityfilter_tool bbduk" - "--shortread_complexityfilter_tool prinseq" + - "--run_merging" steps: - name: Check out pipeline code diff --git a/nextflow.config b/nextflow.config index 19cc823..1c69d36 100644 --- a/nextflow.config +++ b/nextflow.config @@ -76,6 +76,8 @@ params { shortread_complexityfilter_prinseqplusplus_dustscore = 0.5 save_complexityfiltered_reads = false + // run merging + run_merging = false // MALT run_malt = false diff --git a/workflows/taxprofiler.nf b/workflows/taxprofiler.nf index 3b08402..61eda6e 100644 --- a/workflows/taxprofiler.nf +++ b/workflows/taxprofiler.nf @@ -125,13 +125,34 @@ workflow TAXPROFILER { ch_shortreads_filtered = ch_shortreads_preprocessed } + /* + STEP: Run merging + */ + + if ( params.run_merging ) { + ch_reads_for_cat = ch_shortreads_filtered + .mix( ch_longreads_preprocessed ) + .map { + meta, reads -> + def meta_new = meta.clone() + meta_new['run_accession'].remove() + [ meta_new, reads ] + } + .groupTuple() + + ch_reads_runmerged = CAT_FASTQ ( ch_reads_for_cat ) + + } else { + ch_reads_runmerged = ch_shortreads_filtered + .mix( ch_longreads_preprocessed ) + } + /* COMBINE READS WITH POSSIBLE DATABASES */ // e.g. output [DUMP: reads_plus_db] [['id':'2612', 'run_accession':'combined', 'instrument_platform':'ILLUMINA', 'single_end':1], /2612.merged.fastq.gz, ['tool':'malt', 'db_name':'mal95', 'db_params':'"-id 90"'], /malt90] - ch_input_for_profiling = ch_shortreads_filtered - .mix( ch_longreads_preprocessed ) + ch_input_for_profiling = ch_reads_runmerged .combine(DB_CHECK.out.dbs) .branch { malt: it[2]['tool'] == 'malt' From 74c496f6af04e15e1625d7311791004a645b3a21 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Fri, 8 Apr 2022 11:58:16 +0200 Subject: [PATCH 05/25] Fix CAT_FASTQ output --- workflows/taxprofiler.nf | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/workflows/taxprofiler.nf b/workflows/taxprofiler.nf index 61eda6e..2d0c17d 100644 --- a/workflows/taxprofiler.nf +++ b/workflows/taxprofiler.nf @@ -140,8 +140,7 @@ workflow TAXPROFILER { } .groupTuple() - ch_reads_runmerged = CAT_FASTQ ( ch_reads_for_cat ) - + ch_reads_runmerged = CAT_FASTQ ( ch_reads_for_cat ).reads } else { ch_reads_runmerged = ch_shortreads_filtered .mix( ch_longreads_preprocessed ) @@ -247,6 +246,10 @@ workflow TAXPROFILER { ch_versions = ch_versions.mix( SHORTREAD_COMPLEXITYFILTERING.out.versions ) } + if (params.run_merging){ + ch_versions = ch_versions.mix(CAT_FASTQ.out.versions) + } + if (params.run_kraken2) { ch_multiqc_files = ch_multiqc_files.mix( KRAKEN2_KRAKEN2.out.txt.collect{it[1]}.ifEmpty([]) ) ch_versions = ch_versions.mix( KRAKEN2_KRAKEN2.out.versions.first() ) From 4d726a87e98f56bf1a4e0d52259d9e83f89539ce Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Fri, 8 Apr 2022 12:01:07 +0200 Subject: [PATCH 06/25] Fix metadata removal --- workflows/taxprofiler.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflows/taxprofiler.nf b/workflows/taxprofiler.nf index 2d0c17d..33d9725 100644 --- a/workflows/taxprofiler.nf +++ b/workflows/taxprofiler.nf @@ -135,7 +135,7 @@ workflow TAXPROFILER { .map { meta, reads -> def meta_new = meta.clone() - meta_new['run_accession'].remove() + meta_new.remove('run_accession') [ meta_new, reads ] } .groupTuple() From d130a72d74899c3fd85db3c1c751b7b6848fd031 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Fri, 8 Apr 2022 13:09:23 +0200 Subject: [PATCH 07/25] Get this working --- .github/workflows/ci.yml | 1 + conf/modules.config | 6 +++--- subworkflows/local/shortread_fastp.nf | 4 ++-- workflows/taxprofiler.nf | 18 ++++++++++++++++-- 4 files changed, 22 insertions(+), 7 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 7678645..53423cb 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -40,6 +40,7 @@ jobs: - "--shortread_complexityfilter_tool bbduk" - "--shortread_complexityfilter_tool prinseq" - "--run_merging" + - "--run_merging --shortread_clipmerge_mergepairs" steps: - name: Check out pipeline code diff --git a/conf/modules.config b/conf/modules.config index b59850f..7602e3f 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -163,7 +163,7 @@ process { withName: MALT_RUN { ext.args = { "${meta.db_params}" } - ext.prefix = { "${meta.id}-${meta.run_accession}-${meta.db_name}" } + ext.prefix = if params.run_merging : { "${meta.id}-${meta.db_name}" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}" } publishDir = [ path: { "${params.outdir}/malt/${meta.db_name}" }, mode: params.publish_dir_mode, @@ -173,7 +173,7 @@ process { withName: KRAKEN2_KRAKEN2 { ext.args = { "${meta.db_params}" } - ext.prefix = { "${meta.id}-${meta.run_accession}-${meta.db_name}" } + ext.prefix = if params.run_merging : { "${meta.id}-${meta.db_name}" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}" } publishDir = [ path: { "${params.outdir}/kraken2/${meta.db_name}" }, mode: params.publish_dir_mode, @@ -183,7 +183,7 @@ process { withName: METAPHLAN3 { publishDir = [ - path: { "${params.outdir}/metaphlan3/${meta.db_name}" }, + ext.prefix = if params.run_merging : { "${meta.id}-${meta.db_name}" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}" } mode: params.publish_dir_mode, pattern: '*.{biom,txt}' ] diff --git a/subworkflows/local/shortread_fastp.nf b/subworkflows/local/shortread_fastp.nf index 18baf17..04057b1 100644 --- a/subworkflows/local/shortread_fastp.nf +++ b/subworkflows/local/shortread_fastp.nf @@ -28,8 +28,8 @@ workflow SHORTREAD_FASTP { .map { meta, reads -> def meta_new = meta.clone() - meta_new['single_end'] = 1 - [ meta_new, reads ] + meta_new['single_end'] = true + [ meta_new, reads.flatten() ] } ch_fastp_reads_prepped = ch_fastp_reads_prepped_pe.mix( FASTP_SINGLE.out.reads ) diff --git a/workflows/taxprofiler.nf b/workflows/taxprofiler.nf index 33d9725..e04d4d6 100644 --- a/workflows/taxprofiler.nf +++ b/workflows/taxprofiler.nf @@ -130,7 +130,8 @@ workflow TAXPROFILER { */ if ( params.run_merging ) { - ch_reads_for_cat = ch_shortreads_filtered + + ch_reads_for_cat_branch = ch_shortreads_filtered .mix( ch_longreads_preprocessed ) .map { meta, reads -> @@ -139,8 +140,21 @@ workflow TAXPROFILER { [ meta_new, reads ] } .groupTuple() + .map { + meta, reads -> + [ meta, reads.flatten() ] + } + .branch { + // we can't concate files if there is not a second run, we branch + // here to separate them out, and mix after + cat: ( it[0]['single_end'] && it[1].size() > 1 ) || ( !it[0]['single_end'] && it[1].size() > 2 ) + skip: true + } + + ch_reads_for_cat_branch.cat.dump(tag: "for_catting") + + ch_reads_runmerged = CAT_FASTQ ( ch_reads_for_cat_branch.cat ).reads.mix( ch_reads_for_cat_branch.skip ) - ch_reads_runmerged = CAT_FASTQ ( ch_reads_for_cat ).reads } else { ch_reads_runmerged = ch_shortreads_filtered .mix( ch_longreads_preprocessed ) From ca011ccc5b363ea9d6c1eaf44713a9c09e471f39 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Fri, 8 Apr 2022 13:28:37 +0200 Subject: [PATCH 08/25] Fix cat_fastq naming logic? --- conf/modules.config | 19 +++++++++++++++---- nextflow.config | 1 + nextflow_schema.json | 38 +++++++++++++++++++++++++++++--------- workflows/taxprofiler.nf | 4 ---- 4 files changed, 45 insertions(+), 17 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 7602e3f..97e9510 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -161,9 +161,19 @@ process { ] } + withName: CAT_FASTQ { + ext.prefix = { "${meta.id}-${meta.run_accession}" } + publishDir = [ + path: { "${params.outdir}/run_merging/" }, + mode: params.publish_dir_mode, + pattern: '*.fastq.gz', + enabled: params.save_runmerged_reads + ] + } + withName: MALT_RUN { ext.args = { "${meta.db_params}" } - ext.prefix = if params.run_merging : { "${meta.id}-${meta.db_name}" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}" } + ext.prefix = params.run_merging ? { "${meta.id}-${meta.db_name}" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}" } publishDir = [ path: { "${params.outdir}/malt/${meta.db_name}" }, mode: params.publish_dir_mode, @@ -173,7 +183,7 @@ process { withName: KRAKEN2_KRAKEN2 { ext.args = { "${meta.db_params}" } - ext.prefix = if params.run_merging : { "${meta.id}-${meta.db_name}" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}" } + ext.prefix = params.run_merging ? { "${meta.id}-${meta.db_name}" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}" } publishDir = [ path: { "${params.outdir}/kraken2/${meta.db_name}" }, mode: params.publish_dir_mode, @@ -182,12 +192,13 @@ process { } withName: METAPHLAN3 { + ext.args = { "${meta.db_params}" } + ext.prefix = params.run_merging ? { "${meta.id}-${meta.db_name}" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}" } publishDir = [ - ext.prefix = if params.run_merging : { "${meta.id}-${meta.db_name}" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}" } + path: { "${params.outdir}/metaphlan3/${meta.db_name}" }, mode: params.publish_dir_mode, pattern: '*.{biom,txt}' ] - ext.prefix = { "${meta.id}-${meta.run_accession}-${meta.db_name}" } } withName: CUSTOM_DUMPSOFTWAREVERSIONS { diff --git a/nextflow.config b/nextflow.config index 1c69d36..d969ed9 100644 --- a/nextflow.config +++ b/nextflow.config @@ -78,6 +78,7 @@ params { // run merging run_merging = false + save_runmerged_reads = false // MALT run_malt = false diff --git a/nextflow_schema.json b/nextflow_schema.json index 6858409..0b4b4fb 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -10,7 +10,10 @@ "type": "object", "fa_icon": "fas fa-terminal", "description": "Define where the pipeline should find input data and save output data.", - "required": ["input", "outdir"], + "required": [ + "input", + "outdir" + ], "properties": { "input": { "type": "string", @@ -173,7 +176,14 @@ "description": "Method used to save pipeline results to output directory.", "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.", "fa_icon": "fas fa-copy", - "enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"], + "enum": [ + "symlink", + "rellink", + "link", + "copy", + "copyNoFollow", + "move" + ], "hidden": true }, "email_on_fail": { @@ -288,7 +298,10 @@ "shortread_clipmerge_tool": { "type": "string", "default": "fastp", - "enum": ["fastp", "adapterremoval"] + "enum": [ + "fastp", + "adapterremoval" + ] }, "shortread_clipmerge_skipadaptertrim": { "type": "boolean" @@ -309,8 +322,7 @@ "default": 15 }, "save_preprocessed_reads": { - "type": "boolean", - "default": false + "type": "boolean" }, "shortread_complexityfilter_tool": { "type": "string", @@ -333,15 +345,23 @@ "shortread_complexityfilter_prinseqplusplus_mode": { "type": "string", "default": "entropy", - "enum": ["entropy", "dust"] + "enum": [ + "entropy", + "dust" + ] }, "shortread_complexityfilter_prinseqplusplus_dustscore": { "type": "number", "default": 0.5 }, "save_complexityfiltered_reads": { - "type": "boolean", - "default": false + "type": "boolean" + }, + "run_merging": { + "type": "boolean" + }, + "save_runmerged_reads": { + "type": "boolean" } } -} +} \ No newline at end of file diff --git a/workflows/taxprofiler.nf b/workflows/taxprofiler.nf index e04d4d6..bdb93ab 100644 --- a/workflows/taxprofiler.nf +++ b/workflows/taxprofiler.nf @@ -140,10 +140,6 @@ workflow TAXPROFILER { [ meta_new, reads ] } .groupTuple() - .map { - meta, reads -> - [ meta, reads.flatten() ] - } .branch { // we can't concate files if there is not a second run, we branch // here to separate them out, and mix after From a634814d848a68252d81b231e34942f5fa616c83 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Fri, 8 Apr 2022 13:34:00 +0200 Subject: [PATCH 09/25] Formatting and fix fastp output --- nextflow_schema.json | 26 ++++--------------- .../local/shortread_adapterremoval.nf | 2 +- 2 files changed, 6 insertions(+), 22 deletions(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index 0b4b4fb..64836df 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -10,10 +10,7 @@ "type": "object", "fa_icon": "fas fa-terminal", "description": "Define where the pipeline should find input data and save output data.", - "required": [ - "input", - "outdir" - ], + "required": ["input", "outdir"], "properties": { "input": { "type": "string", @@ -176,14 +173,7 @@ "description": "Method used to save pipeline results to output directory.", "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.", "fa_icon": "fas fa-copy", - "enum": [ - "symlink", - "rellink", - "link", - "copy", - "copyNoFollow", - "move" - ], + "enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"], "hidden": true }, "email_on_fail": { @@ -298,10 +288,7 @@ "shortread_clipmerge_tool": { "type": "string", "default": "fastp", - "enum": [ - "fastp", - "adapterremoval" - ] + "enum": ["fastp", "adapterremoval"] }, "shortread_clipmerge_skipadaptertrim": { "type": "boolean" @@ -345,10 +332,7 @@ "shortread_complexityfilter_prinseqplusplus_mode": { "type": "string", "default": "entropy", - "enum": [ - "entropy", - "dust" - ] + "enum": ["entropy", "dust"] }, "shortread_complexityfilter_prinseqplusplus_dustscore": { "type": "number", @@ -364,4 +348,4 @@ "type": "boolean" } } -} \ No newline at end of file +} diff --git a/subworkflows/local/shortread_adapterremoval.nf b/subworkflows/local/shortread_adapterremoval.nf index b573be9..a7948e7 100644 --- a/subworkflows/local/shortread_adapterremoval.nf +++ b/subworkflows/local/shortread_adapterremoval.nf @@ -67,7 +67,7 @@ workflow SHORTREAD_ADAPTERREMOVAL { [meta_new, reads] } .groupTuple() - .map { meta, fastq -> [meta, fastq.flatten()] } + .map { meta, fastq -> [meta, [ fastq ].flatten()] } CAT_FASTQ(ch_concat_fastq) From 6c14f2b230a6c6df99e89c44ed8904cb0a5a7b59 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Fri, 8 Apr 2022 13:44:52 +0200 Subject: [PATCH 10/25] Remove the flattening? --- subworkflows/local/shortread_adapterremoval.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/subworkflows/local/shortread_adapterremoval.nf b/subworkflows/local/shortread_adapterremoval.nf index a7948e7..735d3b8 100644 --- a/subworkflows/local/shortread_adapterremoval.nf +++ b/subworkflows/local/shortread_adapterremoval.nf @@ -67,7 +67,7 @@ workflow SHORTREAD_ADAPTERREMOVAL { [meta_new, reads] } .groupTuple() - .map { meta, fastq -> [meta, [ fastq ].flatten()] } + .map { meta, fastq -> [meta, fastq] } CAT_FASTQ(ch_concat_fastq) From 35cb6e042acf3fbe34c9664bebb9c27ef0d96179 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Fri, 8 Apr 2022 13:46:12 +0200 Subject: [PATCH 11/25] Flatten the right thing --- subworkflows/local/shortread_adapterremoval.nf | 2 +- subworkflows/local/shortread_fastp.nf | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/subworkflows/local/shortread_adapterremoval.nf b/subworkflows/local/shortread_adapterremoval.nf index 735d3b8..b573be9 100644 --- a/subworkflows/local/shortread_adapterremoval.nf +++ b/subworkflows/local/shortread_adapterremoval.nf @@ -67,7 +67,7 @@ workflow SHORTREAD_ADAPTERREMOVAL { [meta_new, reads] } .groupTuple() - .map { meta, fastq -> [meta, fastq] } + .map { meta, fastq -> [meta, fastq.flatten()] } CAT_FASTQ(ch_concat_fastq) diff --git a/subworkflows/local/shortread_fastp.nf b/subworkflows/local/shortread_fastp.nf index 04057b1..4626691 100644 --- a/subworkflows/local/shortread_fastp.nf +++ b/subworkflows/local/shortread_fastp.nf @@ -29,7 +29,7 @@ workflow SHORTREAD_FASTP { meta, reads -> def meta_new = meta.clone() meta_new['single_end'] = true - [ meta_new, reads.flatten() ] + [ meta_new, [ reads ].flatten() ] } ch_fastp_reads_prepped = ch_fastp_reads_prepped_pe.mix( FASTP_SINGLE.out.reads ) From 8839fe22b8856fb1d69e11e2e1e0a4367cb97dc3 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Fri, 8 Apr 2022 20:02:22 +0200 Subject: [PATCH 12/25] Fix output tuple for reads --- workflows/taxprofiler.nf | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/workflows/taxprofiler.nf b/workflows/taxprofiler.nf index bdb93ab..7d5f60f 100644 --- a/workflows/taxprofiler.nf +++ b/workflows/taxprofiler.nf @@ -149,7 +149,15 @@ workflow TAXPROFILER { ch_reads_for_cat_branch.cat.dump(tag: "for_catting") - ch_reads_runmerged = CAT_FASTQ ( ch_reads_for_cat_branch.cat ).reads.mix( ch_reads_for_cat_branch.skip ) + ch_reads_runmerged = CAT_FASTQ ( ch_reads_for_cat_branch.cat ).reads + .mix( ch_reads_for_cat_branch.skip ) + .map { + meta, reads -> + + [ meta, [ reads ].flatten() ] + } + + ch_reads_runmerged.dump(tag: "ch_reads_runmerged" ) } else { ch_reads_runmerged = ch_shortreads_filtered From afb66e445fcb2f9c60f4152e1184f5f8600a843c Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Fri, 8 Apr 2022 21:36:59 +0200 Subject: [PATCH 13/25] Append pairment to ID at profiling to prevent multiqc-level filename crash --- workflows/taxprofiler.nf | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/workflows/taxprofiler.nf b/workflows/taxprofiler.nf index 7d5f60f..1da812d 100644 --- a/workflows/taxprofiler.nf +++ b/workflows/taxprofiler.nf @@ -147,8 +147,6 @@ workflow TAXPROFILER { skip: true } - ch_reads_for_cat_branch.cat.dump(tag: "for_catting") - ch_reads_runmerged = CAT_FASTQ ( ch_reads_for_cat_branch.cat ).reads .mix( ch_reads_for_cat_branch.skip ) .map { @@ -157,8 +155,6 @@ workflow TAXPROFILER { [ meta, [ reads ].flatten() ] } - ch_reads_runmerged.dump(tag: "ch_reads_runmerged" ) - } else { ch_reads_runmerged = ch_shortreads_filtered .mix( ch_longreads_preprocessed ) @@ -168,8 +164,15 @@ workflow TAXPROFILER { COMBINE READS WITH POSSIBLE DATABASES */ - // e.g. output [DUMP: reads_plus_db] [['id':'2612', 'run_accession':'combined', 'instrument_platform':'ILLUMINA', 'single_end':1], /2612.merged.fastq.gz, ['tool':'malt', 'db_name':'mal95', 'db_params':'"-id 90"'], /malt90] + // e.g. output [DUMP: reads_plus_db] [['id':'2612', 'run_accession':'combined', 'instrument_platform':'ILLUMINA', 'single_end':1], [ /2612.merged.fastq.gz ], ['tool':'malt', 'db_name':'mal95', 'db_params':'"-id 90"'], /malt90] ch_input_for_profiling = ch_reads_runmerged + .map { + meta, reads -> + def meta_new = meta.clone() + pairtype = meta_new['single_end'] ? '_se' : '_pe' + meta_new['id'] = meta_new['id'] + pairtype + [meta_new, reads] + } .combine(DB_CHECK.out.dbs) .branch { malt: it[2]['tool'] == 'malt' From 29073e3c93e1ea5a4221ca8ec2080547716b503c Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Mon, 11 Apr 2022 13:38:42 +0200 Subject: [PATCH 14/25] Update CI tests --- .github/workflows/ci.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 79148f0..5e57889 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -39,6 +39,7 @@ jobs: - "--shortread_clipmerge_tool adapterremoval --shortread_clipmerge_mergepairs" - "--shortread_complexityfilter_tool bbduk" - "--shortread_complexityfilter_tool prinseq" + - "--shortread_complexityfilter false --shortread_hostremoval" steps: - name: Check out pipeline code From 78182c2a8ca72d17b3a1004f100180d44439db42 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Mon, 11 Apr 2022 13:40:24 +0200 Subject: [PATCH 15/25] Add comma --- workflows/taxprofiler.nf | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/workflows/taxprofiler.nf b/workflows/taxprofiler.nf index abb330d..95e3588 100644 --- a/workflows/taxprofiler.nf +++ b/workflows/taxprofiler.nf @@ -125,7 +125,7 @@ workflow TAXPROFILER { } else { ch_longreads_preprocessed = INPUT_CHECK.out.nanopore } - + /* SUBWORKFLOW: COMPLEXITY FILTERING */ @@ -146,12 +146,12 @@ workflow TAXPROFILER { } else { ch_shortreads_hostremoved = ch_shortreads_filtered } - + /* SUBWORKFLOW: PROFILING */ - PROFILING ( ch_shortreads_hostremoved ch_longreads_preprocessed, DB_CHECK.out.dbs ) + PROFILING ( ch_shortreads_hostremoved, ch_longreads_preprocessed, DB_CHECK.out.dbs ) ch_versions = ch_versions.mix( PROFILING.out.versions ) /* @@ -191,7 +191,7 @@ workflow TAXPROFILER { if (params.shortread_hostremoval) { ch_multiqc_files = ch_multiqc_files.mix(SHORTREAD_HOSTREMOVAL.out.mqc.collect{it[1]}.ifEmpty([])) } - + ch_multiqc_files = ch_multiqc_files.mix( PROFILING.out.mqc ) // TODO create multiQC module for metaphlan From a5f4fc42d53fce99863a3d135b017b9e468722be Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Tue, 12 Apr 2022 09:25:27 +0200 Subject: [PATCH 16/25] Fix run merging for unmerged PE data --- conf/modules.config | 2 +- workflows/taxprofiler.nf | 7 +++++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 97e9510..eb448bb 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -162,7 +162,7 @@ process { } withName: CAT_FASTQ { - ext.prefix = { "${meta.id}-${meta.run_accession}" } + ext.prefix = { "${meta.id}" } publishDir = [ path: { "${params.outdir}/run_merging/" }, mode: params.publish_dir_mode, diff --git a/workflows/taxprofiler.nf b/workflows/taxprofiler.nf index 1da812d..2eb7e8c 100644 --- a/workflows/taxprofiler.nf +++ b/workflows/taxprofiler.nf @@ -139,7 +139,11 @@ workflow TAXPROFILER { meta_new.remove('run_accession') [ meta_new, reads ] } - .groupTuple() + .groupTuple(by: 0) + .map { + meta, reads -> + [ meta, reads.flatten() ] + } .branch { // we can't concate files if there is not a second run, we branch // here to separate them out, and mix after @@ -151,7 +155,6 @@ workflow TAXPROFILER { .mix( ch_reads_for_cat_branch.skip ) .map { meta, reads -> - [ meta, [ reads ].flatten() ] } From 9f221f84cc66ca121a46d8f68db8f727dec523c9 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Tue, 12 Apr 2022 10:12:17 +0200 Subject: [PATCH 17/25] Only supply single input channel to profiling, as these are merged into single input channel at run_merging --- subworkflows/local/profiling.nf | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/subworkflows/local/profiling.nf b/subworkflows/local/profiling.nf index c74c583..07b6b72 100644 --- a/subworkflows/local/profiling.nf +++ b/subworkflows/local/profiling.nf @@ -9,8 +9,7 @@ include { METAPHLAN3 } from '../../modules/nf-core/modules/meta workflow PROFILING { take: - shortreads // [ [ meta ], [ reads ] ] - longreads // [ [ meta ], [ reads ] ] + reads // [ [ meta ], [ reads ] ] databases // [ [ meta ], path ] main: @@ -22,9 +21,9 @@ workflow PROFILING { */ // e.g. output [DUMP: reads_plus_db] [['id':'2612', 'run_accession':'combined', 'instrument_platform':'ILLUMINA', 'single_end':1], /2612.merged.fastq.gz, ['tool':'malt', 'db_name':'mal95', 'db_params':'"-id 90"'], /malt90] - ch_input_for_profiling = shortreads - .mix( longreads ) + ch_input_for_profiling = reads .combine(databases) + .dump(tag: "combined_withdbs") .branch { malt: it[2]['tool'] == 'malt' kraken2: it[2]['tool'] == 'kraken2' From 2ef21c6ef3e46ae824ad5c99970b8a5250ef8e38 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Tue, 12 Apr 2022 10:14:05 +0200 Subject: [PATCH 18/25] Fix input to profiling --- subworkflows/local/profiling.nf | 1 - workflows/taxprofiler.nf | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/subworkflows/local/profiling.nf b/subworkflows/local/profiling.nf index 07b6b72..ac744aa 100644 --- a/subworkflows/local/profiling.nf +++ b/subworkflows/local/profiling.nf @@ -23,7 +23,6 @@ workflow PROFILING { // e.g. output [DUMP: reads_plus_db] [['id':'2612', 'run_accession':'combined', 'instrument_platform':'ILLUMINA', 'single_end':1], /2612.merged.fastq.gz, ['tool':'malt', 'db_name':'mal95', 'db_params':'"-id 90"'], /malt90] ch_input_for_profiling = reads .combine(databases) - .dump(tag: "combined_withdbs") .branch { malt: it[2]['tool'] == 'malt' kraken2: it[2]['tool'] == 'kraken2' diff --git a/workflows/taxprofiler.nf b/workflows/taxprofiler.nf index 0916cac..7c02f4c 100644 --- a/workflows/taxprofiler.nf +++ b/workflows/taxprofiler.nf @@ -167,7 +167,7 @@ workflow TAXPROFILER { SUBWORKFLOW: PROFILING */ - PROFILING ( ch_reads_runmerged, ch_longreads_preprocessed, DB_CHECK.out.dbs ) + PROFILING ( ch_reads_runmerged, DB_CHECK.out.dbs ) ch_versions = ch_versions.mix( PROFILING.out.versions ) /* From 26399718b2440499f294ebc59d669dd78dd7cdb6 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Tue, 12 Apr 2022 10:46:03 +0200 Subject: [PATCH 19/25] Re-add pairment attachment --- subworkflows/local/profiling.nf | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/subworkflows/local/profiling.nf b/subworkflows/local/profiling.nf index ac744aa..b03b83e 100644 --- a/subworkflows/local/profiling.nf +++ b/subworkflows/local/profiling.nf @@ -23,6 +23,12 @@ workflow PROFILING { // e.g. output [DUMP: reads_plus_db] [['id':'2612', 'run_accession':'combined', 'instrument_platform':'ILLUMINA', 'single_end':1], /2612.merged.fastq.gz, ['tool':'malt', 'db_name':'mal95', 'db_params':'"-id 90"'], /malt90] ch_input_for_profiling = reads .combine(databases) + meta, reads -> + def meta_new = meta.clone() + pairtype = meta_new['single_end'] ? '_se' : '_pe' + meta_new['id'] = meta_new['id'] + pairtype + [meta_new, reads] + } .branch { malt: it[2]['tool'] == 'malt' kraken2: it[2]['tool'] == 'kraken2' From 8d689141924a52cf72e666e76db1b575d541a341 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Tue, 12 Apr 2022 10:47:44 +0200 Subject: [PATCH 20/25] Re-add operator name --- subworkflows/local/profiling.nf | 1 + 1 file changed, 1 insertion(+) diff --git a/subworkflows/local/profiling.nf b/subworkflows/local/profiling.nf index b03b83e..8a156c2 100644 --- a/subworkflows/local/profiling.nf +++ b/subworkflows/local/profiling.nf @@ -23,6 +23,7 @@ workflow PROFILING { // e.g. output [DUMP: reads_plus_db] [['id':'2612', 'run_accession':'combined', 'instrument_platform':'ILLUMINA', 'single_end':1], /2612.merged.fastq.gz, ['tool':'malt', 'db_name':'mal95', 'db_params':'"-id 90"'], /malt90] ch_input_for_profiling = reads .combine(databases) + .map { meta, reads -> def meta_new = meta.clone() pairtype = meta_new['single_end'] ? '_se' : '_pe' From a15c45b00cd18e2148baf7112f15c340261acdef Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Tue, 12 Apr 2022 10:51:27 +0200 Subject: [PATCH 21/25] Put map in the rigt place --- subworkflows/local/profiling.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/subworkflows/local/profiling.nf b/subworkflows/local/profiling.nf index 8a156c2..18de739 100644 --- a/subworkflows/local/profiling.nf +++ b/subworkflows/local/profiling.nf @@ -22,7 +22,6 @@ workflow PROFILING { // e.g. output [DUMP: reads_plus_db] [['id':'2612', 'run_accession':'combined', 'instrument_platform':'ILLUMINA', 'single_end':1], /2612.merged.fastq.gz, ['tool':'malt', 'db_name':'mal95', 'db_params':'"-id 90"'], /malt90] ch_input_for_profiling = reads - .combine(databases) .map { meta, reads -> def meta_new = meta.clone() @@ -30,6 +29,7 @@ workflow PROFILING { meta_new['id'] = meta_new['id'] + pairtype [meta_new, reads] } + .combine(databases) .branch { malt: it[2]['tool'] == 'malt' kraken2: it[2]['tool'] == 'kraken2' From 16a3556bfcfddf8195bbcd486653325aaa1de4ee Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Wed, 13 Apr 2022 08:26:08 +0200 Subject: [PATCH 22/25] Changes after code review --- workflows/taxprofiler.nf | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/workflows/taxprofiler.nf b/workflows/taxprofiler.nf index 7c02f4c..58671b3 100644 --- a/workflows/taxprofiler.nf +++ b/workflows/taxprofiler.nf @@ -139,15 +139,16 @@ workflow TAXPROFILER { meta_new.remove('run_accession') [ meta_new, reads ] } - .groupTuple(by: 0) + .groupTuple() .map { meta, reads -> [ meta, reads.flatten() ] } .branch { + meta, reads -> // we can't concatenate files if there is not a second run, we branch // here to separate them out, and mix back in after for efficiency - cat: ( it[0]['single_end'] && it[1].size() > 1 ) || ( !it[0]['single_end'] && it[1].size() > 2 ) + cat: ( meta.single_end && reads.size() > 1 ) || ( !meta.single_end && reads.size() > 2 ) skip: true } From dfeaa0d1fe79a6f59c27bdb481904b3bbb620234 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Wed, 13 Apr 2022 12:00:28 +0200 Subject: [PATCH 23/25] Rename subworkflow parameters for consistency --- .github/workflows/ci.yml | 8 ++++---- conf/modules.config | 6 +++--- conf/test.config | 6 +++--- nextflow.config | 11 ++++++----- nextflow_schema.json | 22 +++++++++++----------- workflows/taxprofiler.nf | 16 ++++++++-------- 6 files changed, 35 insertions(+), 34 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 53423cb..c373bc8 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -29,8 +29,8 @@ jobs: - NXF_VER: "" NXF_EDGE: "1" parameters: - - "--longread_clip false" - - "--shortread_clip false" + - "--perform_longread_clip false" + - "--perform_shortread_clipmerge false" - "--shortread_clipmerge_tool fastp" - "--shortread_clipmerge_tool fastp --shortread_clipmerge_mergepairs --shortread_clipmerge_excludeunmerged" - "--shortread_clipmerge_tool fastp --shortread_clipmerge_mergepairs" @@ -39,8 +39,8 @@ jobs: - "--shortread_clipmerge_tool adapterremoval --shortread_clipmerge_mergepairs" - "--shortread_complexityfilter_tool bbduk" - "--shortread_complexityfilter_tool prinseq" - - "--run_merging" - - "--run_merging --shortread_clipmerge_mergepairs" + - "--perform_runmerging" + - "--perform_runmerging --shortread_clipmerge_mergepairs" steps: - name: Check out pipeline code diff --git a/conf/modules.config b/conf/modules.config index d93486f..42528de 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -173,7 +173,7 @@ process { withName: MALT_RUN { ext.args = { "${meta.db_params}" } - ext.prefix = params.run_merging ? { "${meta.id}-${meta.db_name}" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}" } + ext.prefix = params.perform_runmerging ? { "${meta.id}-${meta.db_name}" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}" } publishDir = [ path: { "${params.outdir}/malt/${meta.db_name}" }, mode: params.publish_dir_mode, @@ -183,7 +183,7 @@ process { withName: KRAKEN2_KRAKEN2 { ext.args = { "${meta.db_params}" } - ext.prefix = params.run_merging ? { "${meta.id}-${meta.db_name}" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}" } + ext.prefix = params.perform_runmerging ? { "${meta.id}-${meta.db_name}" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}" } publishDir = [ path: { "${params.outdir}/kraken2/${meta.db_name}" }, mode: params.publish_dir_mode, @@ -193,7 +193,7 @@ process { withName: METAPHLAN3 { ext.args = { "${meta.db_params}" } - ext.prefix = params.run_merging ? { "${meta.id}-${meta.db_name}" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}" } + ext.prefix = params.perform_runmerging ? { "${meta.id}-${meta.db_name}" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}" } publishDir = [ path: { "${params.outdir}/metaphlan3/${meta.db_name}" }, mode: params.publish_dir_mode, diff --git a/conf/test.config b/conf/test.config index 6e82300..923dda7 100644 --- a/conf/test.config +++ b/conf/test.config @@ -28,7 +28,7 @@ params { run_malt = true run_metaphlan3 = true run_centrifuge = true - shortread_clipmerge = true - longread_clip = false - shortread_complexityfilter = true + perform_shortread_clipmerge = true + perform_longread_clip = false + perform_shortread_complexityfilter = true } diff --git a/nextflow.config b/nextflow.config index da8bbdb..b72b4f9 100644 --- a/nextflow.config +++ b/nextflow.config @@ -55,7 +55,7 @@ params { databases = null // FASTQ preprocessing - shortread_clipmerge = false + perform_shortread_clipmerge = false shortread_clipmerge_tool = 'fastp' shortread_clipmerge_skipadaptertrim = false shortread_clipmerge_mergepairs = false @@ -63,11 +63,11 @@ params { shortread_clipmerge_adapter1 = null shortread_clipmerge_adapter2 = null shortread_clipmerge_minlength = 15 - longread_clip = false + perform_longread_clip = false save_preprocessed_reads = false // Complexity filtering - shortread_complexityfilter = false + perform_shortread_complexityfilter = false shortread_complexityfilter_tool = 'bbduk' shortread_complexityfilter_entropy = 0.3 shortread_complexityfilter_bbduk_windowsize = 50 @@ -77,8 +77,8 @@ params { save_complexityfiltered_reads = false // run merging - run_merging = false - save_runmerged_reads = false + perform_runmerging = false + save_runmerged_reads = false // MALT run_malt = false @@ -92,6 +92,7 @@ params { centrifuge_save_unaligned = false centrifuge_save_aligned = false centrifuge_sam_format = false + // metaphlan3 run_metaphlan3 = false } diff --git a/nextflow_schema.json b/nextflow_schema.json index 4db7fa0..06bd94b 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -262,15 +262,9 @@ "type": "string", "default": "None" }, - "shortread_clipmerge": { - "type": "boolean" - }, "shortread_clipmerge_excludeunmerged": { "type": "boolean" }, - "longread_clip": { - "type": "boolean" - }, "run_malt": { "type": "boolean" }, @@ -334,9 +328,6 @@ "shortread_complexityfilter_bbduk_mask": { "type": "boolean" }, - "shortread_complexityfilter": { - "type": "boolean" - }, "shortread_complexityfilter_entropy": { "type": "number", "default": 0.3 @@ -353,10 +344,19 @@ "save_complexityfiltered_reads": { "type": "boolean" }, - "run_merging": { + "save_runmerged_reads": { "type": "boolean" }, - "save_runmerged_reads": { + "perform_shortread_clipmerge": { + "type": "boolean" + }, + "perform_longread_clip": { + "type": "boolean" + }, + "perform_shortread_complexityfilter": { + "type": "boolean" + }, + "perform_runmerging": { "type": "boolean" } } diff --git a/workflows/taxprofiler.nf b/workflows/taxprofiler.nf index 58671b3..f086557 100644 --- a/workflows/taxprofiler.nf +++ b/workflows/taxprofiler.nf @@ -101,14 +101,14 @@ workflow TAXPROFILER { /* SUBWORKFLOW: PERFORM PREPROCESSING */ - if ( params.shortread_clipmerge ) { + if ( params.perform_shortread_clipmerge ) { ch_shortreads_preprocessed = SHORTREAD_PREPROCESSING ( INPUT_CHECK.out.fastq ).reads } else { ch_shortreads_preprocessed = INPUT_CHECK.out.fastq } - if ( params.longread_clip ) { + if ( params.perform_longread_clip ) { ch_longreads_preprocessed = LONGREAD_PREPROCESSING ( INPUT_CHECK.out.nanopore ).reads .map { it -> [ it[0], [it[1]] ] } } else { @@ -119,7 +119,7 @@ workflow TAXPROFILER { SUBWORKFLOW: COMPLEXITY FILTERING */ - if ( params.shortread_complexityfilter ) { + if ( params.perform_shortread_complexityfilter ) { ch_shortreads_filtered = SHORTREAD_COMPLEXITYFILTERING ( ch_shortreads_preprocessed ).reads } else { ch_shortreads_filtered = ch_shortreads_preprocessed @@ -129,7 +129,7 @@ workflow TAXPROFILER { STEP: Run merging */ - if ( params.run_merging ) { + if ( params.perform_runmerging ) { ch_reads_for_cat_branch = ch_shortreads_filtered .mix( ch_longreads_preprocessed ) @@ -190,22 +190,22 @@ workflow TAXPROFILER { ch_multiqc_files = ch_multiqc_files.mix(CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml.collect()) ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}.ifEmpty([])) - if (params.shortread_clipmerge) { + if (params.perform_shortread_clipmerge) { ch_multiqc_files = ch_multiqc_files.mix( SHORTREAD_PREPROCESSING.out.mqc.collect{it[1]}.ifEmpty([]) ) ch_versions = ch_versions.mix( SHORTREAD_PREPROCESSING.out.versions ) } - if (params.longread_clip) { + if (params.perform_longread_clip) { ch_multiqc_files = ch_multiqc_files.mix( LONGREAD_PREPROCESSING.out.mqc.collect{it[1]}.ifEmpty([]) ) ch_versions = ch_versions.mix( LONGREAD_PREPROCESSING.out.versions ) } - if (params.shortread_complexityfilter){ + if (params.perform_shortread_complexityfilter){ ch_multiqc_files = ch_multiqc_files.mix( SHORTREAD_COMPLEXITYFILTERING.out.mqc.collect{it[1]}.ifEmpty([]) ) ch_versions = ch_versions.mix( SHORTREAD_COMPLEXITYFILTERING.out.versions ) } - if (params.run_merging){ + if (params.perform_runmerging){ ch_versions = ch_versions.mix(CAT_FASTQ.out.versions) } From aa2d07c42ae4e451711a4147264782d567576e88 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Wed, 13 Apr 2022 14:19:19 +0200 Subject: [PATCH 24/25] Fix merge cockup --- .github/workflows/ci.yml | 2 +- conf/test.config | 2 +- nextflow.config | 2 +- nextflow_schema.json | 2 +- workflows/taxprofiler.nf | 13 +++++-------- 5 files changed, 9 insertions(+), 12 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index b18e601..a1ece72 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -41,7 +41,7 @@ jobs: - "--shortread_complexityfilter_tool prinseq" - "--perform_runmerging" - "--perform_runmerging --shortread_clipmerge_mergepairs" - - "--shortread_complexityfilter false --shortread_hostremoval" + - "--shortread_complexityfilter false --perform_shortread_hostremoval" steps: - name: Check out pipeline code diff --git a/conf/test.config b/conf/test.config index 1d08d91..8f12312 100644 --- a/conf/test.config +++ b/conf/test.config @@ -31,6 +31,6 @@ params { perform_shortread_clipmerge = true perform_longread_clip = false perform_shortread_complexityfilter = true - shortread_hostremoval = true + perform_shortread_hostremoval = true shortread_hostremoval_reference = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta' } diff --git a/nextflow.config b/nextflow.config index 6b0a79d..94c5837 100644 --- a/nextflow.config +++ b/nextflow.config @@ -81,7 +81,7 @@ params { save_runmerged_reads = false // Host Removal - shortread_hostremoval = false + perform_shortread_hostremoval = false shortread_hostremoval_reference = null shortread_hostremoval_index = null diff --git a/nextflow_schema.json b/nextflow_schema.json index 07295e3..cf0edab 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -359,7 +359,7 @@ "perform_runmerging": { "type": "boolean" }, - "shortread_hostremoval": { + "perform_shortread_hostremoval": { "type": "boolean" }, "shortread_hostremoval_reference": { diff --git a/workflows/taxprofiler.nf b/workflows/taxprofiler.nf index 894a1e1..9fe8cc8 100644 --- a/workflows/taxprofiler.nf +++ b/workflows/taxprofiler.nf @@ -22,11 +22,10 @@ if (params.databases) { ch_databases = file(params.databases) } else { exit 1, ' if (params.shortread_clipmerge_mergepairs && params.run_malt ) log.warn "[nf-core/taxprofiler] warning: MALT does not accept uncollapsed paired-reads. Pairs will be profiled as separate files." if (params.shortread_clipmerge_excludeunmerged && !params.shortread_clipmerge_mergepairs) exit 1, "[nf-core/taxprofiler] error: cannot include unmerged reads when merging not turned on. Please specify --shortread_clipmerge_mergepairs" -// TODO Add check if index but no reference exit 1 -if (params.shortread_hostremoval && !params.shortread_hostremoval_reference) { exit 1, "[nf-core/taxprofiler] error: --shortread_hostremoval requested but no --shortread_hostremoval_reference FASTA supplied. Check input." } +if (params.perform_shortread_hostremoval && !params.shortread_hostremoval_reference) { exit 1, "[nf-core/taxprofiler] error: --shortread_hostremoval requested but no --shortread_hostremoval_reference FASTA supplied. Check input." } if (!params.shortread_hostremoval_reference && params.shortread_hostremoval_reference_index) { exit 1, "[nf-core/taxprofiler] error: --shortread_hostremoval_index provided but no --shortread_hostremoval_reference FASTA supplied. Check input." } -if (params.shortread_hostremoval_reference ) { ch_reference = file(params.shortread_hostremoval_reference) } else { ch_reference = [] } +if (params.shortread_hostremoval_reference ) { ch_reference = file(params.shortread_hostremoval_reference) } if (params.shortread_hostremoval_index ) { ch_reference_index = file(params.shortread_hostremoval_index ) } else { ch_reference_index = [] } /* @@ -140,15 +139,13 @@ workflow TAXPROFILER { SUBWORKFLOW: HOST REMOVAL */ - if ( params.shortread_hostremoval ) { + if ( params.perform_shortread_hostremoval ) { ch_shortreads_hostremoved = SHORTREAD_HOSTREMOVAL ( ch_shortreads_filtered, ch_reference, ch_reference_index ).reads ch_versions = ch_versions.mix(SHORTREAD_HOSTREMOVAL.out.versions.first()) } else { ch_shortreads_hostremoved = ch_shortreads_filtered } - */ - if ( params.perform_runmerging ) { ch_reads_for_cat_branch = ch_shortreads_hostremoved @@ -225,11 +222,11 @@ workflow TAXPROFILER { ch_versions = ch_versions.mix( SHORTREAD_COMPLEXITYFILTERING.out.versions ) } - if (params.shortread_hostremoval) { + if (params.perform_shortread_hostremoval) { ch_multiqc_files = ch_multiqc_files.mix(SHORTREAD_HOSTREMOVAL.out.mqc.collect{it[1]}.ifEmpty([])) ch_versions = ch_versions.mix(SHORTREAD_HOSTREMOVAL.out.versions) } - + if (params.perform_runmerging){ ch_versions = ch_versions.mix(CAT_FASTQ.out.versions) } From 5e80df0f949557883ea6e961b0ea7ba3214065c5 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Wed, 13 Apr 2022 15:27:53 +0200 Subject: [PATCH 25/25] Apply suggestions from code review --- conf/test.config | 4 ++-- nextflow.config | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/conf/test.config b/conf/test.config index 8f12312..9fa5de8 100644 --- a/conf/test.config +++ b/conf/test.config @@ -31,6 +31,6 @@ params { perform_shortread_clipmerge = true perform_longread_clip = false perform_shortread_complexityfilter = true - perform_shortread_hostremoval = true - shortread_hostremoval_reference = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta' + perform_shortread_hostremoval = true + shortread_hostremoval_reference = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta' } diff --git a/nextflow.config b/nextflow.config index 94c5837..bf3ca92 100644 --- a/nextflow.config +++ b/nextflow.config @@ -81,7 +81,7 @@ params { save_runmerged_reads = false // Host Removal - perform_shortread_hostremoval = false + perform_shortread_hostremoval = false shortread_hostremoval_reference = null shortread_hostremoval_index = null