From a821f748ee89ade6e229edab28fa7f3399969596 Mon Sep 17 00:00:00 2001 From: ljmesi <37740329+ljmesi@users.noreply.github.com> Date: Tue, 3 May 2022 09:28:46 +0200 Subject: [PATCH 01/17] Add modules --- conf/modules.config | 41 ++++++++++++ modules.json | 14 +++- modules/nf-core/modules/metaphlan3/main.nf | 2 +- .../nf-core/modules/minimap2/align/main.nf | 48 ++++++++++++++ .../nf-core/modules/minimap2/align/meta.yml | 65 +++++++++++++++++++ .../nf-core/modules/minimap2/index/main.nf | 33 ++++++++++ .../nf-core/modules/minimap2/index/meta.yml | 30 +++++++++ .../nf-core/modules/samtools/bam2fq/main.nf | 56 ++++++++++++++++ .../nf-core/modules/samtools/bam2fq/meta.yml | 55 ++++++++++++++++ modules/nf-core/modules/samtools/view/main.nf | 44 +++++++++++++ .../nf-core/modules/samtools/view/meta.yml | 57 ++++++++++++++++ nf-core/modules/samtools/bam2fq/main.nf | 56 ++++++++++++++++ nf-core/modules/samtools/bam2fq/meta.yml | 55 ++++++++++++++++ 13 files changed, 554 insertions(+), 2 deletions(-) create mode 100644 modules/nf-core/modules/minimap2/align/main.nf create mode 100644 modules/nf-core/modules/minimap2/align/meta.yml create mode 100644 modules/nf-core/modules/minimap2/index/main.nf create mode 100644 modules/nf-core/modules/minimap2/index/meta.yml create mode 100644 modules/nf-core/modules/samtools/bam2fq/main.nf create mode 100644 modules/nf-core/modules/samtools/bam2fq/meta.yml create mode 100644 modules/nf-core/modules/samtools/view/main.nf create mode 100644 modules/nf-core/modules/samtools/view/meta.yml create mode 100644 nf-core/modules/samtools/bam2fq/main.nf create mode 100644 nf-core/modules/samtools/bam2fq/meta.yml diff --git a/conf/modules.config b/conf/modules.config index d8fb382..b707954 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -164,6 +164,47 @@ process { ] } + withName: MINIMAP2_INDEX { + ext.args = '-x map-ont' + publishDir = [ + path: { "${params.outdir}/minimap2/index" }, + mode: params.publish_dir_mode, + enabled: params.save_minimap2_hostremoval_index, + pattern: 'minimap2' + ] + } + + withName: MINIMAP2_ALIGN { + ext.prefix = { "${meta.id}_${meta.run_accession}" } + publishDir = [ + path: { "${params.outdir}/minimap2/align" }, + mode: params.publish_dir_mode, + enabled: params.save_minimap2_hostremoval_mapped, + pattern: '*.bam' + ] + } + + withName: SAMTOOLS_VIEW { + ext.args = '-f 4' + ext.prefix = { "${meta.id}.mapped.sorted" } + publishDir = [ + path: { "${params.outdir}/samtools/view" }, + mode: params.publish_dir_mode, + enabled: params.save_samtools_unmapped_bam, + pattern: '*.bam' + ] + } + + withName: SAMTOOLS_BAM2FQ { + ext.prefix = { "${meta.id}_${meta.run_accession}" } + publishDir = [ + path: { "${params.outdir}/samtools/bam2fq" }, + mode: params.publish_dir_mode, + enabled: params.save_minimap2_unmapped_fq, + pattern: '*.fq.gz' + ] + } + withName: BBMAP_BBDUK { ext.args = [ "entropy=${params.shortread_complexityfilter_entropy}", diff --git a/modules.json b/modules.json index a65926c..071234d 100644 --- a/modules.json +++ b/modules.json @@ -52,6 +52,12 @@ "git_sha": "2d38566eca4cc15142b2ffa7c11837569b39aece" }, "metaphlan3": { + "git_sha": "ed4dd1a928ebf4308efb720de878045f7773f8e2" + }, + "minimap2/align": { + "git_sha": "1a5a9e7b4009dcf34e6867dd1a5a1d9a718b027b" + }, + "minimap2/index": { "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" }, "multiqc": { @@ -63,9 +69,15 @@ "prinseqplusplus": { "git_sha": "f1c5384c31e985591716afdd732cf8c2ae29d05b" }, + "samtools/bam2fq": { + "git_sha": "897c33d5da084b61109500ee44c01da2d3e4e773" + }, + "samtools/view": { + "git_sha": "12afb6b0faf3cabf769c9a2a7dd477e3f066eac0" + }, "untar": { "git_sha": "e080f4c8acf5760039ed12ec1f206170f3f9a918" } } } -} +} \ No newline at end of file diff --git a/modules/nf-core/modules/metaphlan3/main.nf b/modules/nf-core/modules/metaphlan3/main.nf index 3fc6b27..bff0eb9 100644 --- a/modules/nf-core/modules/metaphlan3/main.nf +++ b/modules/nf-core/modules/metaphlan3/main.nf @@ -23,7 +23,7 @@ process METAPHLAN3 { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - def input_type = ("$input".endsWith(".fastq.gz")) ? "--input_type fastq" : ("$input".contains(".fasta")) ? "--input_type fasta" : ("$input".endsWith(".bowtie2out.txt")) ? "--input_type bowtie2out" : "--input_type sam" + def input_type = ("$input".endsWith(".fastq.gz") || "$input".endsWith(".fq.gz")) ? "--input_type fastq" : ("$input".contains(".fasta")) ? "--input_type fasta" : ("$input".endsWith(".bowtie2out.txt")) ? "--input_type bowtie2out" : "--input_type sam" def input_data = ("$input_type".contains("fastq")) && !meta.single_end ? "${input[0]},${input[1]}" : "$input" def bowtie2_out = "$input_type" == "--input_type bowtie2out" || "$input_type" == "--input_type sam" ? '' : "--bowtie2out ${prefix}.bowtie2out.txt" diff --git a/modules/nf-core/modules/minimap2/align/main.nf b/modules/nf-core/modules/minimap2/align/main.nf new file mode 100644 index 0000000..08ac6ee --- /dev/null +++ b/modules/nf-core/modules/minimap2/align/main.nf @@ -0,0 +1,48 @@ +process MINIMAP2_ALIGN { + tag "$meta.id" + label 'process_medium' + + conda (params.enable_conda ? 'bioconda::minimap2=2.21 bioconda::samtools=1.12' : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-66534bcbb7031a148b13e2ad42583020b9cd25c4:1679e915ddb9d6b4abda91880c4b48857d471bd8-0' : + 'quay.io/biocontainers/mulled-v2-66534bcbb7031a148b13e2ad42583020b9cd25c4:1679e915ddb9d6b4abda91880c4b48857d471bd8-0' }" + + input: + tuple val(meta), path(reads) + path reference + val bam_format + val cigar_paf_format + val cigar_bam + + output: + tuple val(meta), path("*.paf"), optional: true, emit: paf + tuple val(meta), path("*.bam"), optional: true, emit: bam + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def input_reads = meta.single_end ? "$reads" : "${reads[0]} ${reads[1]}" + def bam_output = bam_format ? "-a | samtools sort | samtools view -@ ${task.cpus} -b -h -o ${prefix}.bam" : "-o ${prefix}.paf" + def cigar_paf = cigar_paf_format && !bam_format ? "-c" : '' + def set_cigar_bam = cigar_bam && bam_format ? "-L" : '' + """ + minimap2 \\ + $args \\ + -t $task.cpus \\ + $reference \\ + $input_reads \\ + $cigar_paf \\ + $set_cigar_bam \\ + $bam_output + + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + minimap2: \$(minimap2 --version 2>&1) + END_VERSIONS + """ +} diff --git a/modules/nf-core/modules/minimap2/align/meta.yml b/modules/nf-core/modules/minimap2/align/meta.yml new file mode 100644 index 0000000..991b39a --- /dev/null +++ b/modules/nf-core/modules/minimap2/align/meta.yml @@ -0,0 +1,65 @@ +name: minimap2_align +description: A versatile pairwise aligner for genomic and spliced nucleotide sequences +keywords: + - align + - fasta + - fastq + - genome + - paf + - reference +tools: + - minimap2: + description: | + A versatile pairwise aligner for genomic and spliced nucleotide sequences. + homepage: https://github.com/lh3/minimap2 + documentation: https://github.com/lh3/minimap2#uguide + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FASTA or FASTQ files of size 1 and 2 for single-end + and paired-end data, respectively. + - reference: + type: file + description: | + Reference database in FASTA format. + - bam_format: + type: boolean + description: Specify that output should be in BAM format + - cigar_paf_format: + type: boolean + description: Specify that output CIGAR should be in PAF format + - cigar_bam: + type: boolean + description: | + Write CIGAR with >65535 ops at the CG tag. This is recommended when + doing XYZ (https://github.com/lh3/minimap2#working-with-65535-cigar-operations) +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - paf: + type: file + description: Alignment in PAF format + pattern: "*.paf" + - bam: + type: file + description: Alignment in BAM format + pattern: "*.bam" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@heuermh" + - "@sofstam" + - "@sateeshperi" + - "@jfy133" diff --git a/modules/nf-core/modules/minimap2/index/main.nf b/modules/nf-core/modules/minimap2/index/main.nf new file mode 100644 index 0000000..3dfeb86 --- /dev/null +++ b/modules/nf-core/modules/minimap2/index/main.nf @@ -0,0 +1,33 @@ +process MINIMAP2_INDEX { + label 'process_medium' + + conda (params.enable_conda ? 'bioconda::minimap2=2.21' : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/minimap2:2.21--h5bf99c6_0' : + 'quay.io/biocontainers/minimap2:2.21--h5bf99c6_0' }" + + input: + path fasta + + output: + path "*.mmi" , emit: index + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + """ + minimap2 \\ + -t $task.cpus \\ + -d ${fasta.baseName}.mmi \\ + $args \\ + $fasta + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + minimap2: \$(minimap2 --version 2>&1) + END_VERSIONS + """ +} diff --git a/modules/nf-core/modules/minimap2/index/meta.yml b/modules/nf-core/modules/minimap2/index/meta.yml new file mode 100644 index 0000000..3bf9f04 --- /dev/null +++ b/modules/nf-core/modules/minimap2/index/meta.yml @@ -0,0 +1,30 @@ +name: minimap2_index +description: Provides fasta index required by minimap2 alignment. +keywords: + - index + - fasta + - reference +tools: + - minimap2: + description: | + A versatile pairwise aligner for genomic and spliced nucleotide sequences. + homepage: https://github.com/lh3/minimap2 + documentation: https://github.com/lh3/minimap2#uguide + licence: ["MIT"] +input: + - fasta: + type: file + description: | + Reference database in FASTA format. +output: + - mmi: + type: file + description: Minimap2 fasta index. + pattern: "*.mmi" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@yuukiiwa" + - "@drpatelh" diff --git a/modules/nf-core/modules/samtools/bam2fq/main.nf b/modules/nf-core/modules/samtools/bam2fq/main.nf new file mode 100644 index 0000000..554af48 --- /dev/null +++ b/modules/nf-core/modules/samtools/bam2fq/main.nf @@ -0,0 +1,56 @@ +process SAMTOOLS_BAM2FQ { + tag "$meta.id" + label 'process_low' + + conda (params.enable_conda ? "bioconda::samtools=1.15.1" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.15.1--h1170115_0' : + 'quay.io/biocontainers/samtools:1.15.1--h1170115_0' }" + + input: + tuple val(meta), path(inputbam) + val split + + output: + tuple val(meta), path("*.fq.gz"), emit: reads + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + if (split){ + """ + samtools \\ + bam2fq \\ + $args \\ + -@ $task.cpus \\ + -1 ${prefix}_1.fq.gz \\ + -2 ${prefix}_2.fq.gz \\ + -0 ${prefix}_other.fq.gz \\ + -s ${prefix}_singleton.fq.gz \\ + $inputbam + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + } else { + """ + samtools \\ + bam2fq \\ + $args \\ + -@ $task.cpus \\ + $inputbam | gzip > ${prefix}_interleaved.fq.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + } +} diff --git a/modules/nf-core/modules/samtools/bam2fq/meta.yml b/modules/nf-core/modules/samtools/bam2fq/meta.yml new file mode 100644 index 0000000..319a60c --- /dev/null +++ b/modules/nf-core/modules/samtools/bam2fq/meta.yml @@ -0,0 +1,55 @@ +name: samtools_bam2fq +description: | + The module uses bam2fq method from samtools to + convert a SAM, BAM or CRAM file to FASTQ format +keywords: + - bam2fq + - samtools + - fastq +tools: + - samtools: + description: Tools for dealing with SAM, BAM and CRAM files + homepage: None + documentation: http://www.htslib.org/doc/1.1/samtools.html + tool_dev_url: None + doi: "" + licence: ["MIT"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - inputbam: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + - split: + type: boolean + description: | + TRUE/FALSE value to indicate if reads should be separated into + /1, /2 and if present other, or singleton. + Note: choosing TRUE will generate 4 different files. + Choosing FALSE will produce a single file, which will be interleaved in case + the input contains paired reads. + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - reads: + type: file + description: | + FASTQ files, which will be either a group of 4 files (read_1, read_2, other and singleton) + or a single interleaved .fq.gz file if the user chooses not to split the reads. + pattern: "*.fq.gz" + +authors: + - "@lescai" diff --git a/modules/nf-core/modules/samtools/view/main.nf b/modules/nf-core/modules/samtools/view/main.nf new file mode 100644 index 0000000..11cfb74 --- /dev/null +++ b/modules/nf-core/modules/samtools/view/main.nf @@ -0,0 +1,44 @@ +process SAMTOOLS_VIEW { + tag "$meta.id" + label 'process_medium' + + conda (params.enable_conda ? "bioconda::samtools=1.15.1" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.15.1--h1170115_0' : + 'quay.io/biocontainers/samtools:1.15.1--h1170115_0' }" + + input: + tuple val(meta), path(input), path(index) + path fasta + + output: + tuple val(meta), path("*.bam") , emit: bam , optional: true + tuple val(meta), path("*.cram"), emit: cram, optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def reference = fasta ? "--reference ${fasta} -C" : "" + def file_type = input.getExtension() + if ("$input" == "${prefix}.${file_type}") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" + """ + samtools \\ + view \\ + --threads ${task.cpus-1} \\ + ${reference} \\ + $args \\ + $input \\ + $args2 \\ + > ${prefix}.${file_type} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/modules/samtools/view/meta.yml b/modules/nf-core/modules/samtools/view/meta.yml new file mode 100644 index 0000000..a8b43ec --- /dev/null +++ b/modules/nf-core/modules/samtools/view/meta.yml @@ -0,0 +1,57 @@ +name: samtools_view +description: filter/convert SAM/BAM/CRAM file +keywords: + - view + - bam + - sam + - cram +tools: + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: hhttp://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + - index: + type: optional file + description: BAM.BAI/CRAM.CRAI file + pattern: "*.{.bai,.crai}" + - fasta: + type: optional file + description: Reference file the CRAM was created with + pattern: "*.{fasta,fa}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: filtered/converted BAM/SAM file + pattern: "*.{bam,sam}" + - cram: + type: file + description: filtered/converted CRAM file + pattern: "*.cram" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@joseespinosa" + - "@FriederikeHanssen" diff --git a/nf-core/modules/samtools/bam2fq/main.nf b/nf-core/modules/samtools/bam2fq/main.nf new file mode 100644 index 0000000..5d6aa79 --- /dev/null +++ b/nf-core/modules/samtools/bam2fq/main.nf @@ -0,0 +1,56 @@ +process SAMTOOLS_BAM2FQ { + tag "$meta.id" + label 'process_low' + + conda (params.enable_conda ? "bioconda::samtools=1.15.1" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.15.1--h1170115_0' : + 'quay.io/biocontainers/samtools:1.15.1--h1170115_0' }" + + input: + tuple val(meta), path(inputbam) + val split + + output: + tuple val(meta), path("*.fq.gz"), emit: reads + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + if (split){ + """ + samtools \\ + bam2fq \\ + $args \\ + -@ $task.cpus \\ + -1 ${prefix}_1.fq.gz \\ + -2 ${prefix}_2.fq.gz \\ + -0 ${prefix}_other.fq.gz \\ + -s ${prefix}_singleton.fq.gz \\ + $inputbam + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + } else { + """ + samtools \\ + bam2fq \\ + $args \\ + -@ $task.cpus \\ + $inputbam >${prefix}_interleaved.fq.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + } +} diff --git a/nf-core/modules/samtools/bam2fq/meta.yml b/nf-core/modules/samtools/bam2fq/meta.yml new file mode 100644 index 0000000..319a60c --- /dev/null +++ b/nf-core/modules/samtools/bam2fq/meta.yml @@ -0,0 +1,55 @@ +name: samtools_bam2fq +description: | + The module uses bam2fq method from samtools to + convert a SAM, BAM or CRAM file to FASTQ format +keywords: + - bam2fq + - samtools + - fastq +tools: + - samtools: + description: Tools for dealing with SAM, BAM and CRAM files + homepage: None + documentation: http://www.htslib.org/doc/1.1/samtools.html + tool_dev_url: None + doi: "" + licence: ["MIT"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - inputbam: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + - split: + type: boolean + description: | + TRUE/FALSE value to indicate if reads should be separated into + /1, /2 and if present other, or singleton. + Note: choosing TRUE will generate 4 different files. + Choosing FALSE will produce a single file, which will be interleaved in case + the input contains paired reads. + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - reads: + type: file + description: | + FASTQ files, which will be either a group of 4 files (read_1, read_2, other and singleton) + or a single interleaved .fq.gz file if the user chooses not to split the reads. + pattern: "*.fq.gz" + +authors: + - "@lescai" From 5e6be52fab87d6b6a7f14ce254f63b02ad682a63 Mon Sep 17 00:00:00 2001 From: ljmesi <37740329+ljmesi@users.noreply.github.com> Date: Tue, 3 May 2022 09:34:27 +0200 Subject: [PATCH 02/17] Implement hostremoval --- nextflow.config | 18 ++++++--- subworkflows/local/longread_hostremoval.nf | 47 ++++++++++++++++++++++ workflows/taxprofiler.nf | 17 ++++++-- 3 files changed, 72 insertions(+), 10 deletions(-) create mode 100644 subworkflows/local/longread_hostremoval.nf diff --git a/nextflow.config b/nextflow.config index 5644786..04273f1 100644 --- a/nextflow.config +++ b/nextflow.config @@ -81,12 +81,18 @@ params { save_runmerged_reads = false // Host Removal - perform_shortread_hostremoval = false - shortread_hostremoval_reference = null - shortread_hostremoval_index = null - save_hostremoval_index = false - save_hostremoval_mapped = false - save_hostremoval_unmapped = false + perform_shortread_hostremoval = false + shortread_hostremoval_reference = null + shortread_hostremoval_index = null + longread_hostremoval_index = null + save_hostremoval_index = false + save_hostremoval_mapped = false + save_hostremoval_unmapped = false + save_minimap2_hostremoval_index = false + save_minimap2_hostremoval_mapped = false + save_minimap2_hostremoval_unmapped = false + save_samtools_unmapped_bam = false + save_minimap2_unmapped_fq = false // MALT run_malt = false diff --git a/subworkflows/local/longread_hostremoval.nf b/subworkflows/local/longread_hostremoval.nf new file mode 100644 index 0000000..7db020b --- /dev/null +++ b/subworkflows/local/longread_hostremoval.nf @@ -0,0 +1,47 @@ +// +// Remove host reads via alignment and export off-target reads +// + +include { MINIMAP2_INDEX } from '../../modules/nf-core/modules/minimap2/index/main' +include { MINIMAP2_ALIGN } from '../../modules/nf-core/modules/minimap2/align/main' +include { SAMTOOLS_VIEW } from '../../modules/nf-core/modules/samtools/view/main' +include { SAMTOOLS_BAM2FQ } from '../../modules/nf-core/modules/samtools/bam2fq/main' + +workflow LONGREAD_HOSTREMOVAL { + take: + reads // [ [ meta ], [ reads ] ] + reference // /path/to/fasta + index // /path/to/index + + main: + ch_versions = Channel.empty() + ch_multiqc_files = Channel.empty() + + if ( !params.longread_hostremoval_index ) { + ch_minimap2_index = MINIMAP2_INDEX ( reference ).index + ch_versions = ch_versions.mix( MINIMAP2_INDEX.out.versions ) + } else { + ch_minimap2_index = index + } + + MINIMAP2_ALIGN ( reads, ch_minimap2_index, true, false, false ) + ch_versions = ch_versions.mix( MINIMAP2_ALIGN.out.versions.first() ) + ch_minimap2_mapped = MINIMAP2_ALIGN.out.bam + .map { + meta, reads -> + [ meta, reads, [] ] + } + + + SAMTOOLS_VIEW ( ch_minimap2_mapped , [] ) + ch_versions = ch_versions.mix( SAMTOOLS_VIEW.out.versions.first() ) + + SAMTOOLS_BAM2FQ ( SAMTOOLS_VIEW.out.bam, false ) + ch_versions = ch_versions.mix( SAMTOOLS_BAM2FQ.out.versions.first() ) + + + emit: + reads = SAMTOOLS_BAM2FQ.out.reads // channel: [ val(meta), [ reads ] ] + versions = ch_versions // channel: [ versions.yml ] +} + diff --git a/workflows/taxprofiler.nf b/workflows/taxprofiler.nf index a0046b2..9eb53a3 100644 --- a/workflows/taxprofiler.nf +++ b/workflows/taxprofiler.nf @@ -26,7 +26,8 @@ if (params.perform_shortread_hostremoval && !params.shortread_hostremoval_refere if (!params.shortread_hostremoval_reference && params.shortread_hostremoval_reference_index) { exit 1, "ERROR: [nf-core/taxprofiler] --shortread_hostremoval_index provided but no --shortread_hostremoval_reference FASTA supplied. Check input." } if (params.shortread_hostremoval_reference ) { ch_reference = file(params.shortread_hostremoval_reference) } -if (params.shortread_hostremoval_index ) { ch_reference_index = file(params.shortread_hostremoval_index ) } else { ch_reference_index = [] } +if (params.shortread_hostremoval_index ) { ch_shortread_reference_index = file(params.shortread_hostremoval_index ) } else { ch_shortread_reference_index = [] } +if (params.longread_hostremoval_index ) { ch_longread_reference_index = file(params.longread_hostremoval_index ) } else { ch_longread_reference_index = [] } /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -52,6 +53,7 @@ include { DB_CHECK } from '../subworkflows/local/db_check' include { SHORTREAD_PREPROCESSING } from '../subworkflows/local/shortread_preprocessing' include { LONGREAD_PREPROCESSING } from '../subworkflows/local/longread_preprocessing' include { SHORTREAD_HOSTREMOVAL } from '../subworkflows/local/shortread_hostremoval' +include { LONGREAD_HOSTREMOVAL } from '../subworkflows/local/longread_hostremoval' include { SHORTREAD_COMPLEXITYFILTERING } from '../subworkflows/local/shortread_complexityfiltering' include { PROFILING } from '../subworkflows/local/profiling' @@ -141,16 +143,23 @@ workflow TAXPROFILER { */ if ( params.perform_shortread_hostremoval ) { - ch_shortreads_hostremoved = SHORTREAD_HOSTREMOVAL ( ch_shortreads_filtered, ch_reference, ch_reference_index ).reads + ch_shortreads_hostremoved = SHORTREAD_HOSTREMOVAL ( ch_shortreads_filtered, ch_reference, ch_shortread_reference_index ).reads ch_versions = ch_versions.mix(SHORTREAD_HOSTREMOVAL.out.versions) } else { ch_shortreads_hostremoved = ch_shortreads_filtered } + if ( params.perform_longread_hostremoval ) { + ch_longreads_hostremoved = LONGREAD_HOSTREMOVAL ( ch_longreads_preprocessed, ch_reference, ch_longread_reference_index ).reads + ch_versions = ch_versions.mix(LONGREAD_HOSTREMOVAL.out.versions) + } else { + ch_longreads_hostremoved = ch_longreads_preprocessed + } + if ( params.perform_runmerging ) { ch_reads_for_cat_branch = ch_shortreads_hostremoved - .mix( ch_longreads_preprocessed ) + .mix( ch_longreads_hostremoved ) .map { meta, reads -> def meta_new = meta.clone() @@ -182,7 +191,7 @@ workflow TAXPROFILER { } else { ch_reads_runmerged = ch_shortreads_hostremoved - .mix( ch_longreads_preprocessed, INPUT_CHECK.out.fasta ) + .mix( ch_longreads_hostremoved, INPUT_CHECK.out.fasta ) } /* From afdebaf09a41cfc6fd3f3d3455f3f69b1e1dc608 Mon Sep 17 00:00:00 2001 From: ljmesi <37740329+ljmesi@users.noreply.github.com> Date: Tue, 3 May 2022 10:15:11 +0200 Subject: [PATCH 03/17] Remove own modification to a module --- modules/nf-core/modules/samtools/bam2fq/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/nf-core/modules/samtools/bam2fq/main.nf b/modules/nf-core/modules/samtools/bam2fq/main.nf index 554af48..0dd1da9 100644 --- a/modules/nf-core/modules/samtools/bam2fq/main.nf +++ b/modules/nf-core/modules/samtools/bam2fq/main.nf @@ -45,7 +45,7 @@ process SAMTOOLS_BAM2FQ { bam2fq \\ $args \\ -@ $task.cpus \\ - $inputbam | gzip > ${prefix}_interleaved.fq.gz + $inputbam > ${prefix}_interleaved.fq.gz cat <<-END_VERSIONS > versions.yml "${task.process}": From fa1c13263573e5cda53fb2468e8c339ec0ad0ecf Mon Sep 17 00:00:00 2001 From: ljmesi <37740329+ljmesi@users.noreply.github.com> Date: Tue, 3 May 2022 11:03:25 +0200 Subject: [PATCH 04/17] Update nextflow_schema --- nextflow_schema.json | 67 ++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 61 insertions(+), 6 deletions(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index f429d1b..d3878c1 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -10,7 +10,10 @@ "type": "object", "fa_icon": "fas fa-terminal", "description": "Define where the pipeline should find input data and save output data.", - "required": ["input", "outdir"], + "required": [ + "input", + "outdir" + ], "properties": { "input": { "type": "string", @@ -173,7 +176,14 @@ "description": "Method used to save pipeline results to output directory.", "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.", "fa_icon": "fas fa-copy", - "enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"], + "enum": [ + "symlink", + "rellink", + "link", + "copy", + "copyNoFollow", + "move" + ], "hidden": true }, "email_on_fail": { @@ -294,7 +304,10 @@ "shortread_clipmerge_tool": { "type": "string", "default": "fastp", - "enum": ["fastp", "adapterremoval"] + "enum": [ + "fastp", + "adapterremoval" + ] }, "shortread_clipmerge_skipadaptertrim": { "type": "boolean" @@ -335,7 +348,10 @@ "shortread_complexityfilter_prinseqplusplus_mode": { "type": "string", "default": "entropy", - "enum": ["entropy", "dust"] + "enum": [ + "entropy", + "dust" + ] }, "shortread_complexityfilter_prinseqplusplus_dustscore": { "type": "number", @@ -388,7 +404,14 @@ "kaiju_taxon_name": { "type": "string", "default": "species", - "enum": ["phylum", "class", "order", "family", "genus", "species"] + "enum": [ + "phylum", + "class", + "order", + "family", + "genus", + "species" + ] }, "run_diamond": { "type": "boolean" @@ -396,7 +419,39 @@ "diamond_output_format": { "type": "string", "default": "tsv", - "enum": ["blast", "xml", "txt", "daa", "sam", "tsv", "paf"] + "enum": [ + "blast", + "xml", + "txt", + "daa", + "sam", + "tsv", + "paf" + ] + }, + "longread_hostremoval_index": { + "type": "string", + "default": null + }, + "save_minimap2_hostremoval_index": { + "type": "string", + "default": "false", + "description": "Flag for publishing minimap2 host removal index" + }, + "save_minimap2_hostremoval_mapped": { + "type": "string", + "default": "false", + "description": "Flag for publishinig bam file with all long reads mapped to a reference" + }, + "save_samtools_unmapped_bam": { + "type": "string", + "default": "false", + "description": "Flag for publishing bam for reads that did not map to the host reference" + }, + "save_minimap2_unmapped_fq": { + "type": "string", + "default": "false", + "description": "Flag for publishing fastq files for reads that did not map to the host reference" } } } From 4b2a3789cd7ba15ac41f32f6179bcc1665cb8e47 Mon Sep 17 00:00:00 2001 From: ljmesi <37740329+ljmesi@users.noreply.github.com> Date: Tue, 3 May 2022 11:03:48 +0200 Subject: [PATCH 05/17] Update default values for host removal parameters --- nextflow.config | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/nextflow.config b/nextflow.config index 04273f1..95daba9 100644 --- a/nextflow.config +++ b/nextflow.config @@ -81,18 +81,18 @@ params { save_runmerged_reads = false // Host Removal - perform_shortread_hostremoval = false - shortread_hostremoval_reference = null - shortread_hostremoval_index = null - longread_hostremoval_index = null - save_hostremoval_index = false - save_hostremoval_mapped = false - save_hostremoval_unmapped = false - save_minimap2_hostremoval_index = false - save_minimap2_hostremoval_mapped = false - save_minimap2_hostremoval_unmapped = false - save_samtools_unmapped_bam = false - save_minimap2_unmapped_fq = false + perform_shortread_hostremoval = false + shortread_hostremoval_reference = null + shortread_hostremoval_index = null + save_hostremoval_index = false + save_hostremoval_mapped = false + save_hostremoval_unmapped = false + longread_hostremoval_index = null + save_minimap2_hostremoval_index = false + save_minimap2_hostremoval_mapped = false + save_samtools_unmapped_bam = false + save_minimap2_unmapped_fq = false + // MALT run_malt = false From 17039ebc6b143870331c4d2f8976a0fe63de0736 Mon Sep 17 00:00:00 2001 From: ljmesi <37740329+ljmesi@users.noreply.github.com> Date: Tue, 3 May 2022 11:07:23 +0200 Subject: [PATCH 06/17] Fix type to boolean for flags --- nextflow_schema.json | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index d3878c1..257f5cb 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -431,27 +431,23 @@ }, "longread_hostremoval_index": { "type": "string", - "default": null + "default": "None" }, "save_minimap2_hostremoval_index": { - "type": "string", - "default": "false", + "type": "boolean", "description": "Flag for publishing minimap2 host removal index" }, "save_minimap2_hostremoval_mapped": { - "type": "string", - "default": "false", + "type": "boolean", "description": "Flag for publishinig bam file with all long reads mapped to a reference" }, "save_samtools_unmapped_bam": { - "type": "string", - "default": "false", + "type": "boolean", "description": "Flag for publishing bam for reads that did not map to the host reference" }, "save_minimap2_unmapped_fq": { - "type": "string", - "default": "false", + "type": "boolean", "description": "Flag for publishing fastq files for reads that did not map to the host reference" } } -} +} \ No newline at end of file From 58d4dec70be5ac5eb2356aac351593c27f77502a Mon Sep 17 00:00:00 2001 From: ljmesi <37740329+ljmesi@users.noreply.github.com> Date: Tue, 3 May 2022 11:09:54 +0200 Subject: [PATCH 07/17] Update samtools view --- modules.json | 2 +- modules/nf-core/modules/samtools/view/main.nf | 12 ++++++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/modules.json b/modules.json index 071234d..b568d9c 100644 --- a/modules.json +++ b/modules.json @@ -73,7 +73,7 @@ "git_sha": "897c33d5da084b61109500ee44c01da2d3e4e773" }, "samtools/view": { - "git_sha": "12afb6b0faf3cabf769c9a2a7dd477e3f066eac0" + "git_sha": "6b64f9cb6c3dd3577931cc3cd032d6fb730000ce" }, "untar": { "git_sha": "e080f4c8acf5760039ed12ec1f206170f3f9a918" diff --git a/modules/nf-core/modules/samtools/view/main.nf b/modules/nf-core/modules/samtools/view/main.nf index 11cfb74..55194e8 100644 --- a/modules/nf-core/modules/samtools/view/main.nf +++ b/modules/nf-core/modules/samtools/view/main.nf @@ -41,4 +41,16 @@ process SAMTOOLS_VIEW { samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') END_VERSIONS """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.bam + touch ${prefix}.cram + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ } From c833d4fd9ae037e297eda9dea69045715fc4a064 Mon Sep 17 00:00:00 2001 From: ljmesi <37740329+ljmesi@users.noreply.github.com> Date: Tue, 3 May 2022 11:42:05 +0200 Subject: [PATCH 08/17] Fix issues found with prettier --- modules.json | 2 +- nextflow_schema.json | 45 +++++++------------------------------------- 2 files changed, 8 insertions(+), 39 deletions(-) diff --git a/modules.json b/modules.json index b568d9c..ce1fbc5 100644 --- a/modules.json +++ b/modules.json @@ -80,4 +80,4 @@ } } } -} \ No newline at end of file +} diff --git a/nextflow_schema.json b/nextflow_schema.json index 257f5cb..1902e27 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -10,10 +10,7 @@ "type": "object", "fa_icon": "fas fa-terminal", "description": "Define where the pipeline should find input data and save output data.", - "required": [ - "input", - "outdir" - ], + "required": ["input", "outdir"], "properties": { "input": { "type": "string", @@ -176,14 +173,7 @@ "description": "Method used to save pipeline results to output directory.", "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.", "fa_icon": "fas fa-copy", - "enum": [ - "symlink", - "rellink", - "link", - "copy", - "copyNoFollow", - "move" - ], + "enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"], "hidden": true }, "email_on_fail": { @@ -304,10 +294,7 @@ "shortread_clipmerge_tool": { "type": "string", "default": "fastp", - "enum": [ - "fastp", - "adapterremoval" - ] + "enum": ["fastp", "adapterremoval"] }, "shortread_clipmerge_skipadaptertrim": { "type": "boolean" @@ -348,10 +335,7 @@ "shortread_complexityfilter_prinseqplusplus_mode": { "type": "string", "default": "entropy", - "enum": [ - "entropy", - "dust" - ] + "enum": ["entropy", "dust"] }, "shortread_complexityfilter_prinseqplusplus_dustscore": { "type": "number", @@ -404,14 +388,7 @@ "kaiju_taxon_name": { "type": "string", "default": "species", - "enum": [ - "phylum", - "class", - "order", - "family", - "genus", - "species" - ] + "enum": ["phylum", "class", "order", "family", "genus", "species"] }, "run_diamond": { "type": "boolean" @@ -419,15 +396,7 @@ "diamond_output_format": { "type": "string", "default": "tsv", - "enum": [ - "blast", - "xml", - "txt", - "daa", - "sam", - "tsv", - "paf" - ] + "enum": ["blast", "xml", "txt", "daa", "sam", "tsv", "paf"] }, "longread_hostremoval_index": { "type": "string", @@ -450,4 +419,4 @@ "description": "Flag for publishing fastq files for reads that did not map to the host reference" } } -} \ No newline at end of file +} From f4abbe280ab6497a220fde116f606d588b49bf73 Mon Sep 17 00:00:00 2001 From: ljmesi <37740329+ljmesi@users.noreply.github.com> Date: Tue, 3 May 2022 12:56:49 +0200 Subject: [PATCH 09/17] Fix back to nf-core modules version --- .../nf-core/modules/samtools/bam2fq/main.nf | 2 +- nf-core/modules/samtools/bam2fq/main.nf | 56 ------------------- nf-core/modules/samtools/bam2fq/meta.yml | 55 ------------------ 3 files changed, 1 insertion(+), 112 deletions(-) delete mode 100644 nf-core/modules/samtools/bam2fq/main.nf delete mode 100644 nf-core/modules/samtools/bam2fq/meta.yml diff --git a/modules/nf-core/modules/samtools/bam2fq/main.nf b/modules/nf-core/modules/samtools/bam2fq/main.nf index 0dd1da9..5d6aa79 100644 --- a/modules/nf-core/modules/samtools/bam2fq/main.nf +++ b/modules/nf-core/modules/samtools/bam2fq/main.nf @@ -45,7 +45,7 @@ process SAMTOOLS_BAM2FQ { bam2fq \\ $args \\ -@ $task.cpus \\ - $inputbam > ${prefix}_interleaved.fq.gz + $inputbam >${prefix}_interleaved.fq.gz cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/nf-core/modules/samtools/bam2fq/main.nf b/nf-core/modules/samtools/bam2fq/main.nf deleted file mode 100644 index 5d6aa79..0000000 --- a/nf-core/modules/samtools/bam2fq/main.nf +++ /dev/null @@ -1,56 +0,0 @@ -process SAMTOOLS_BAM2FQ { - tag "$meta.id" - label 'process_low' - - conda (params.enable_conda ? "bioconda::samtools=1.15.1" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.15.1--h1170115_0' : - 'quay.io/biocontainers/samtools:1.15.1--h1170115_0' }" - - input: - tuple val(meta), path(inputbam) - val split - - output: - tuple val(meta), path("*.fq.gz"), emit: reads - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - - if (split){ - """ - samtools \\ - bam2fq \\ - $args \\ - -@ $task.cpus \\ - -1 ${prefix}_1.fq.gz \\ - -2 ${prefix}_2.fq.gz \\ - -0 ${prefix}_other.fq.gz \\ - -s ${prefix}_singleton.fq.gz \\ - $inputbam - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') - END_VERSIONS - """ - } else { - """ - samtools \\ - bam2fq \\ - $args \\ - -@ $task.cpus \\ - $inputbam >${prefix}_interleaved.fq.gz - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') - END_VERSIONS - """ - } -} diff --git a/nf-core/modules/samtools/bam2fq/meta.yml b/nf-core/modules/samtools/bam2fq/meta.yml deleted file mode 100644 index 319a60c..0000000 --- a/nf-core/modules/samtools/bam2fq/meta.yml +++ /dev/null @@ -1,55 +0,0 @@ -name: samtools_bam2fq -description: | - The module uses bam2fq method from samtools to - convert a SAM, BAM or CRAM file to FASTQ format -keywords: - - bam2fq - - samtools - - fastq -tools: - - samtools: - description: Tools for dealing with SAM, BAM and CRAM files - homepage: None - documentation: http://www.htslib.org/doc/1.1/samtools.html - tool_dev_url: None - doi: "" - licence: ["MIT"] - -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - inputbam: - type: file - description: BAM/CRAM/SAM file - pattern: "*.{bam,cram,sam}" - - split: - type: boolean - description: | - TRUE/FALSE value to indicate if reads should be separated into - /1, /2 and if present other, or singleton. - Note: choosing TRUE will generate 4 different files. - Choosing FALSE will produce a single file, which will be interleaved in case - the input contains paired reads. - -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - - reads: - type: file - description: | - FASTQ files, which will be either a group of 4 files (read_1, read_2, other and singleton) - or a single interleaved .fq.gz file if the user chooses not to split the reads. - pattern: "*.fq.gz" - -authors: - - "@lescai" From 59c7f5a5b1b6a3e144a9b1c151e6f2a4c315342f Mon Sep 17 00:00:00 2001 From: ljmesi <37740329+ljmesi@users.noreply.github.com> Date: Thu, 5 May 2022 08:38:41 +0200 Subject: [PATCH 10/17] Update samtools/bam2fq --- modules.json | 4 ++-- modules/nf-core/modules/samtools/bam2fq/main.nf | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/modules.json b/modules.json index ce1fbc5..043e4ad 100644 --- a/modules.json +++ b/modules.json @@ -70,7 +70,7 @@ "git_sha": "f1c5384c31e985591716afdd732cf8c2ae29d05b" }, "samtools/bam2fq": { - "git_sha": "897c33d5da084b61109500ee44c01da2d3e4e773" + "git_sha": "5510ea39fe638594bc26ac34cadf4a84bf27d159" }, "samtools/view": { "git_sha": "6b64f9cb6c3dd3577931cc3cd032d6fb730000ce" @@ -80,4 +80,4 @@ } } } -} +} \ No newline at end of file diff --git a/modules/nf-core/modules/samtools/bam2fq/main.nf b/modules/nf-core/modules/samtools/bam2fq/main.nf index 5d6aa79..9301d1d 100644 --- a/modules/nf-core/modules/samtools/bam2fq/main.nf +++ b/modules/nf-core/modules/samtools/bam2fq/main.nf @@ -45,7 +45,7 @@ process SAMTOOLS_BAM2FQ { bam2fq \\ $args \\ -@ $task.cpus \\ - $inputbam >${prefix}_interleaved.fq.gz + $inputbam | gzip --no-name > ${prefix}_interleaved.fq.gz cat <<-END_VERSIONS > versions.yml "${task.process}": From 5b7b21415633c3df64cb2d88734d5863b4162a9c Mon Sep 17 00:00:00 2001 From: ljmesi <37740329+ljmesi@users.noreply.github.com> Date: Thu, 5 May 2022 08:40:53 +0200 Subject: [PATCH 11/17] Fix formatting with prettier --- modules.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules.json b/modules.json index 043e4ad..a55c88b 100644 --- a/modules.json +++ b/modules.json @@ -80,4 +80,4 @@ } } } -} \ No newline at end of file +} From d94534e8acee01575b589e88549ed64ab4fc4411 Mon Sep 17 00:00:00 2001 From: ljmesi <37740329+ljmesi@users.noreply.github.com> Date: Thu, 5 May 2022 09:07:33 +0200 Subject: [PATCH 12/17] Simplify params which control host removal publish --- conf/modules.config | 8 ++++---- nextflow.config | 6 +----- nextflow_schema.json | 16 ---------------- 3 files changed, 5 insertions(+), 25 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index b707954..cd0fb04 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -169,7 +169,7 @@ process { publishDir = [ path: { "${params.outdir}/minimap2/index" }, mode: params.publish_dir_mode, - enabled: params.save_minimap2_hostremoval_index, + enabled: params.save_hostremoval_index, pattern: 'minimap2' ] } @@ -179,7 +179,7 @@ process { publishDir = [ path: { "${params.outdir}/minimap2/align" }, mode: params.publish_dir_mode, - enabled: params.save_minimap2_hostremoval_mapped, + enabled: params.save_hostremoval_mapped, pattern: '*.bam' ] } @@ -190,7 +190,7 @@ process { publishDir = [ path: { "${params.outdir}/samtools/view" }, mode: params.publish_dir_mode, - enabled: params.save_samtools_unmapped_bam, + enabled: params.save_hostremoval_unmapped, pattern: '*.bam' ] } @@ -200,7 +200,7 @@ process { publishDir = [ path: { "${params.outdir}/samtools/bam2fq" }, mode: params.publish_dir_mode, - enabled: params.save_minimap2_unmapped_fq, + enabled: params.save_hostremoval_unmapped, pattern: '*.fq.gz' ] } diff --git a/nextflow.config b/nextflow.config index 95daba9..8c99af2 100644 --- a/nextflow.config +++ b/nextflow.config @@ -84,14 +84,10 @@ params { perform_shortread_hostremoval = false shortread_hostremoval_reference = null shortread_hostremoval_index = null + longread_hostremoval_index = null save_hostremoval_index = false save_hostremoval_mapped = false save_hostremoval_unmapped = false - longread_hostremoval_index = null - save_minimap2_hostremoval_index = false - save_minimap2_hostremoval_mapped = false - save_samtools_unmapped_bam = false - save_minimap2_unmapped_fq = false // MALT diff --git a/nextflow_schema.json b/nextflow_schema.json index 1902e27..9e4cc6c 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -401,22 +401,6 @@ "longread_hostremoval_index": { "type": "string", "default": "None" - }, - "save_minimap2_hostremoval_index": { - "type": "boolean", - "description": "Flag for publishing minimap2 host removal index" - }, - "save_minimap2_hostremoval_mapped": { - "type": "boolean", - "description": "Flag for publishinig bam file with all long reads mapped to a reference" - }, - "save_samtools_unmapped_bam": { - "type": "boolean", - "description": "Flag for publishing bam for reads that did not map to the host reference" - }, - "save_minimap2_unmapped_fq": { - "type": "boolean", - "description": "Flag for publishing fastq files for reads that did not map to the host reference" } } } From 557d31dfd2fecbac2a415ddab780e6a3abc87168 Mon Sep 17 00:00:00 2001 From: ljmesi <37740329+ljmesi@users.noreply.github.com> Date: Thu, 5 May 2022 13:19:10 +0200 Subject: [PATCH 13/17] Add parameter for turning on longread host removal --- conf/test.config | 2 +- docs/usage.md | 2 +- nextflow.config | 2 +- nextflow_schema.json | 4 +++- 4 files changed, 6 insertions(+), 4 deletions(-) diff --git a/conf/test.config b/conf/test.config index a2464b2..6d04f60 100644 --- a/conf/test.config +++ b/conf/test.config @@ -28,7 +28,7 @@ params { perform_longread_clip = false perform_shortread_complexityfilter = true perform_shortread_hostremoval = true - shortread_hostremoval_reference = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta' + perform_longread_hostremoval = true run_kaiju = true run_kraken2 = true run_malt = true diff --git a/docs/usage.md b/docs/usage.md index cee2bb6..537b94a 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -191,7 +191,7 @@ You can optionally save the FASTQ output of the run merging with the `--save_com #### Host Removal -Removal of possible-host reads from FASTQ files prior profiling can be activated with `--perform_shortread_hostremoval` +Removal of possible-host reads from FASTQ files prior profiling can be activated with `--perform_shortread_hostremoval` or `--perform_longread_hostremoval`. Similarly to complexity filtering, host-removal can be useful for runtime optimisation and reduction in misclassified reads. It is not always necessary to report classification of reads from a host when you already know the host of the sample, therefore you can gain a run-time and computational advantage by removing these prior typically resource-heavy profiling with more efficient methods. Furthermore, particularly with human samples, you can reduce the number of false positives during profiling that occur due to host-sequence contamination in reference genomes on public databases. diff --git a/nextflow.config b/nextflow.config index 8c99af2..4ac0c44 100644 --- a/nextflow.config +++ b/nextflow.config @@ -82,7 +82,7 @@ params { // Host Removal perform_shortread_hostremoval = false - shortread_hostremoval_reference = null + perform_longread_hostremoval = false shortread_hostremoval_index = null longread_hostremoval_index = null save_hostremoval_index = false diff --git a/nextflow_schema.json b/nextflow_schema.json index 9e4cc6c..d2eee95 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -362,7 +362,9 @@ "perform_shortread_hostremoval": { "type": "boolean" }, - "shortread_hostremoval_reference": { + "perform_longread_hostremoval": { + "type": "boolean" + }, "type": "string", "default": "None" }, From 6618e8cac65fe43d40d4952863698801f9b4507b Mon Sep 17 00:00:00 2001 From: ljmesi <37740329+ljmesi@users.noreply.github.com> Date: Thu, 5 May 2022 13:20:34 +0200 Subject: [PATCH 14/17] Change param to a more generic name --- conf/test.config | 1 + docs/usage.md | 2 +- nextflow.config | 1 + nextflow_schema.json | 1 + workflows/taxprofiler.nf | 8 ++++---- 5 files changed, 8 insertions(+), 5 deletions(-) diff --git a/conf/test.config b/conf/test.config index 6d04f60..a5244f9 100644 --- a/conf/test.config +++ b/conf/test.config @@ -29,6 +29,7 @@ params { perform_shortread_complexityfilter = true perform_shortread_hostremoval = true perform_longread_hostremoval = true + hostremoval_reference = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta' run_kaiju = true run_kraken2 = true run_malt = true diff --git a/docs/usage.md b/docs/usage.md index 537b94a..1143da3 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -197,7 +197,7 @@ Similarly to complexity filtering, host-removal can be useful for runtime optimi nf-core/taxprofiler currently offers host-removal via alignment against a reference genome with Bowtie2, and the use of the unaligned reads for downstream profiling. -You can supply your reference genome in FASTA format with `--shortread_hostremoval_reference`. You can also optionally supply a directory containing pre-indexed Bowtie2 index files with `--shortread_hostremoval_index`, however nf-core/taxprofiler will generate this for you if necessary. Pre-supplying the directory of index files can greatly speed up the process, and these can be re-used. +You can supply your reference genome in FASTA format with `--hostremoval_reference`. You can also optionally supply a directory containing pre-indexed Bowtie2 index files with `--shortread_hostremoval_index`, however nf-core/taxprofiler will generate this for you if necessary. Pre-supplying the directory of index files can greatly speed up the process, and these can be re-used. > 💡 If you have multiple taxa or sequences you wish to remove (e.g., the host genome and then also PhiX - common quality-control reagent during sequencing) you can simply concatenate the FASTAs of each taxa or sequences into a single reference file. diff --git a/nextflow.config b/nextflow.config index 4ac0c44..ca9e280 100644 --- a/nextflow.config +++ b/nextflow.config @@ -83,6 +83,7 @@ params { // Host Removal perform_shortread_hostremoval = false perform_longread_hostremoval = false + hostremoval_reference = null shortread_hostremoval_index = null longread_hostremoval_index = null save_hostremoval_index = false diff --git a/nextflow_schema.json b/nextflow_schema.json index d2eee95..ab2108e 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -365,6 +365,7 @@ "perform_longread_hostremoval": { "type": "boolean" }, + "hostremoval_reference": { "type": "string", "default": "None" }, diff --git a/workflows/taxprofiler.nf b/workflows/taxprofiler.nf index 9eb53a3..81ea5d9 100644 --- a/workflows/taxprofiler.nf +++ b/workflows/taxprofiler.nf @@ -11,7 +11,7 @@ WorkflowTaxprofiler.initialise(params, log) // TODO nf-core: Add all file path parameters for the pipeline to the list below // Check input path parameters to see if they exist -def checkPathParamList = [ params.input, params.databases, params.shortread_hostremoval_reference, +def checkPathParamList = [ params.input, params.databases, params.hostremoval_reference, params.shortread_hostremoval_index, params.multiqc_config ] for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true) } } @@ -22,10 +22,10 @@ if (params.databases) { ch_databases = file(params.databases) } else { exit 1, ' if (params.shortread_clipmerge_mergepairs && params.run_malt ) log.warn "[nf-core/taxprofiler] MALT does not accept uncollapsed paired-reads. Pairs will be profiled as separate files." if (params.shortread_clipmerge_excludeunmerged && !params.shortread_clipmerge_mergepairs) exit 1, "ERROR: [nf-core/taxprofiler] cannot include unmerged reads when merging not turned on. Please specify --shortread_clipmerge_mergepairs" -if (params.perform_shortread_hostremoval && !params.shortread_hostremoval_reference) { exit 1, "ERROR: [nf-core/taxprofiler] --shortread_hostremoval requested but no --shortread_hostremoval_reference FASTA supplied. Check input." } -if (!params.shortread_hostremoval_reference && params.shortread_hostremoval_reference_index) { exit 1, "ERROR: [nf-core/taxprofiler] --shortread_hostremoval_index provided but no --shortread_hostremoval_reference FASTA supplied. Check input." } +if (params.perform_shortread_hostremoval && !params.hostremoval_reference) { exit 1, "ERROR: [nf-core/taxprofiler] --shortread_hostremoval requested but no --hostremoval_reference FASTA supplied. Check input." } +if (!params.hostremoval_reference && params.hostremoval_reference_index) { exit 1, "ERROR: [nf-core/taxprofiler] --shortread_hostremoval_index provided but no --hostremoval_reference FASTA supplied. Check input." } -if (params.shortread_hostremoval_reference ) { ch_reference = file(params.shortread_hostremoval_reference) } +if (params.hostremoval_reference ) { ch_reference = file(params.hostremoval_reference) } if (params.shortread_hostremoval_index ) { ch_shortread_reference_index = file(params.shortread_hostremoval_index ) } else { ch_shortread_reference_index = [] } if (params.longread_hostremoval_index ) { ch_longread_reference_index = file(params.longread_hostremoval_index ) } else { ch_longread_reference_index = [] } From e3e55f57c26ba462ceb6795be55b312e615ca550 Mon Sep 17 00:00:00 2001 From: ljmesi <37740329+ljmesi@users.noreply.github.com> Date: Thu, 5 May 2022 14:02:24 +0200 Subject: [PATCH 15/17] Fix alignment Just made this change to see if the CI tests would fail again or if the failure was just a one of thing. --- workflows/taxprofiler.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflows/taxprofiler.nf b/workflows/taxprofiler.nf index 81ea5d9..1f2e9d1 100644 --- a/workflows/taxprofiler.nf +++ b/workflows/taxprofiler.nf @@ -47,7 +47,7 @@ ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath(params.multi // // SUBWORKFLOW: Consisting of a mix of local and nf-core/modules // -include { INPUT_CHECK } from '../subworkflows/local/input_check' +include { INPUT_CHECK } from '../subworkflows/local/input_check' include { DB_CHECK } from '../subworkflows/local/db_check' include { SHORTREAD_PREPROCESSING } from '../subworkflows/local/shortread_preprocessing' From b0faeedc60c654fe445909ceed569ee010c84837 Mon Sep 17 00:00:00 2001 From: Lauri Mesilaakso Date: Fri, 6 May 2022 08:20:31 +0200 Subject: [PATCH 16/17] Update workflows/taxprofiler.nf Co-authored-by: James A. Fellows Yates --- workflows/taxprofiler.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflows/taxprofiler.nf b/workflows/taxprofiler.nf index 1f2e9d1..7a6cd09 100644 --- a/workflows/taxprofiler.nf +++ b/workflows/taxprofiler.nf @@ -25,7 +25,7 @@ if (params.shortread_clipmerge_excludeunmerged && !params.shortread_clipmerge_me if (params.perform_shortread_hostremoval && !params.hostremoval_reference) { exit 1, "ERROR: [nf-core/taxprofiler] --shortread_hostremoval requested but no --hostremoval_reference FASTA supplied. Check input." } if (!params.hostremoval_reference && params.hostremoval_reference_index) { exit 1, "ERROR: [nf-core/taxprofiler] --shortread_hostremoval_index provided but no --hostremoval_reference FASTA supplied. Check input." } -if (params.hostremoval_reference ) { ch_reference = file(params.hostremoval_reference) } +if (params.hostremoval_reference ) { ch_reference = file(params.hostremoval_reference) } if (params.shortread_hostremoval_index ) { ch_shortread_reference_index = file(params.shortread_hostremoval_index ) } else { ch_shortread_reference_index = [] } if (params.longread_hostremoval_index ) { ch_longread_reference_index = file(params.longread_hostremoval_index ) } else { ch_longread_reference_index = [] } From 02517733aa2227d1bc6f4decdb607918de047b2d Mon Sep 17 00:00:00 2001 From: Lauri Mesilaakso Date: Fri, 6 May 2022 10:40:59 +0200 Subject: [PATCH 17/17] Update docs/usage.md Co-authored-by: James A. Fellows Yates --- docs/usage.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/usage.md b/docs/usage.md index 1143da3..4aa1d09 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -197,7 +197,7 @@ Similarly to complexity filtering, host-removal can be useful for runtime optimi nf-core/taxprofiler currently offers host-removal via alignment against a reference genome with Bowtie2, and the use of the unaligned reads for downstream profiling. -You can supply your reference genome in FASTA format with `--hostremoval_reference`. You can also optionally supply a directory containing pre-indexed Bowtie2 index files with `--shortread_hostremoval_index`, however nf-core/taxprofiler will generate this for you if necessary. Pre-supplying the directory of index files can greatly speed up the process, and these can be re-used. +You can supply your reference genome in FASTA format with `--hostremoval_reference`. You can also optionally supply a directory containing pre-indexed Bowtie2 index files with `--shortread_hostremoval_index` or `--longread_hostremoval_index`, however nf-core/taxprofiler will generate this for you if necessary. Pre-supplying the directory of index files can greatly speed up the process, and these can be re-used. > 💡 If you have multiple taxa or sequences you wish to remove (e.g., the host genome and then also PhiX - common quality-control reagent during sequencing) you can simply concatenate the FASTAs of each taxa or sequences into a single reference file.