From 5b99d9024aeafcad5256d534d08c7cd7a2b79d70 Mon Sep 17 00:00:00 2001 From: Sofia Stamouli Date: Fri, 28 Oct 2022 14:24:33 +0200 Subject: [PATCH 1/9] Add samtools stats for short-reads --- assets/multiqc_config.yml | 1 + conf/modules.config | 12 +++++ modules.json | 8 ++++ modules/nf-core/samtools/index/main.nf | 48 +++++++++++++++++++ modules/nf-core/samtools/index/meta.yml | 53 +++++++++++++++++++++ modules/nf-core/samtools/stats/main.nf | 49 +++++++++++++++++++ modules/nf-core/samtools/stats/meta.yml | 53 +++++++++++++++++++++ subworkflows/local/shortread_hostremoval.nf | 12 +++++ 8 files changed, 236 insertions(+) create mode 100644 modules/nf-core/samtools/index/main.nf create mode 100644 modules/nf-core/samtools/index/meta.yml create mode 100644 modules/nf-core/samtools/stats/main.nf create mode 100644 modules/nf-core/samtools/stats/meta.yml diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index e2b5a6e..e4a04a9 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -21,6 +21,7 @@ run_modules: - adapterRemoval - fastp - bowtie2 + - samtools - kraken - malt - custom_content diff --git a/conf/modules.config b/conf/modules.config index a4d36fd..476ca11 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -233,6 +233,18 @@ process { ] } + withName: SAMTOOLS_STATS { + ext.prefix = { "${meta.id}_${meta.run_accession}" } + publishDir = [ + path: { "${params.outdir}/samtools/stats" }, + mode: params.publish_dir_mode, + enabled: params.save_hostremoval_unmapped, + pattern: '*stats' + ] + } + + + withName: BBMAP_BBDUK { ext.args = [ "entropy=${params.shortread_complexityfilter_entropy}", diff --git a/modules.json b/modules.json index bcf2567..be3d193 100644 --- a/modules.json +++ b/modules.json @@ -153,6 +153,14 @@ "branch": "master", "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905" }, + "samtools/index": { + "branch": "master", + "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905" + }, + "samtools/stats": { + "branch": "master", + "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905" + }, "samtools/view": { "branch": "master", "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905" diff --git a/modules/nf-core/samtools/index/main.nf b/modules/nf-core/samtools/index/main.nf new file mode 100644 index 0000000..e04e63e --- /dev/null +++ b/modules/nf-core/samtools/index/main.nf @@ -0,0 +1,48 @@ +process SAMTOOLS_INDEX { + tag "$meta.id" + label 'process_low' + + conda (params.enable_conda ? "bioconda::samtools=1.15.1" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.15.1--h1170115_0' : + 'quay.io/biocontainers/samtools:1.15.1--h1170115_0' }" + + input: + tuple val(meta), path(input) + + output: + tuple val(meta), path("*.bai") , optional:true, emit: bai + tuple val(meta), path("*.csi") , optional:true, emit: csi + tuple val(meta), path("*.crai"), optional:true, emit: crai + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + """ + samtools \\ + index \\ + -@ ${task.cpus-1} \\ + $args \\ + $input + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + """ + touch ${input}.bai + touch ${input}.crai + touch ${input}.csi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/samtools/index/meta.yml b/modules/nf-core/samtools/index/meta.yml new file mode 100644 index 0000000..e5cadbc --- /dev/null +++ b/modules/nf-core/samtools/index/meta.yml @@ -0,0 +1,53 @@ +name: samtools_index +description: Index SAM/BAM/CRAM file +keywords: + - index + - bam + - sam + - cram +tools: + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: hhttp://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bai: + type: file + description: BAM/CRAM/SAM index file + pattern: "*.{bai,crai,sai}" + - crai: + type: file + description: BAM/CRAM/SAM index file + pattern: "*.{bai,crai,sai}" + - csi: + type: file + description: CSI index file + pattern: "*.{csi}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@ewels" + - "@maxulysse" diff --git a/modules/nf-core/samtools/stats/main.nf b/modules/nf-core/samtools/stats/main.nf new file mode 100644 index 0000000..9b0c386 --- /dev/null +++ b/modules/nf-core/samtools/stats/main.nf @@ -0,0 +1,49 @@ +process SAMTOOLS_STATS { + tag "$meta.id" + label 'process_single' + + conda (params.enable_conda ? "bioconda::samtools=1.15.1" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.15.1--h1170115_0' : + 'quay.io/biocontainers/samtools:1.15.1--h1170115_0' }" + + input: + tuple val(meta), path(input), path(input_index) + path fasta + + output: + tuple val(meta), path("*.stats"), emit: stats + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def reference = fasta ? "--reference ${fasta}" : "" + """ + samtools \\ + stats \\ + --threads ${task.cpus} \\ + ${reference} \\ + ${input} \\ + > ${prefix}.stats + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.stats + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/samtools/stats/meta.yml b/modules/nf-core/samtools/stats/meta.yml new file mode 100644 index 0000000..cac50b1 --- /dev/null +++ b/modules/nf-core/samtools/stats/meta.yml @@ -0,0 +1,53 @@ +name: samtools_stats +description: Produces comprehensive statistics from SAM/BAM/CRAM file +keywords: + - statistics + - counts + - bam + - sam + - cram +tools: + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: hhttp://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: BAM/CRAM file from alignment + pattern: "*.{bam,cram}" + - input_index: + type: file + description: BAI/CRAI file from alignment + pattern: "*.{bai,crai}" + - fasta: + type: optional file + description: Reference file the CRAM was created with + pattern: "*.{fasta,fa}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - stats: + type: file + description: File containing samtools stats output + pattern: "*.{stats}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@FriederikeHanssen" diff --git a/subworkflows/local/shortread_hostremoval.nf b/subworkflows/local/shortread_hostremoval.nf index d181a34..a1ed758 100644 --- a/subworkflows/local/shortread_hostremoval.nf +++ b/subworkflows/local/shortread_hostremoval.nf @@ -4,6 +4,8 @@ include { BOWTIE2_BUILD } from '../../modules/nf-core/bowtie2/build/main' include { BOWTIE2_ALIGN } from '../../modules/nf-core/bowtie2/align/main' +include { SAMTOOLS_INDEX } from '../../modules/nf-core/samtools/index/main' +include { SAMTOOLS_STATS } from '../../modules/nf-core/samtools/stats/main' workflow SHORTREAD_HOSTREMOVAL { take: @@ -26,7 +28,17 @@ workflow SHORTREAD_HOSTREMOVAL { ch_versions = ch_versions.mix( BOWTIE2_ALIGN.out.versions.first() ) ch_multiqc_files = ch_multiqc_files.mix( BOWTIE2_ALIGN.out.log ) + SAMTOOLS_INDEX ( BOWTIE2_ALIGN.out.bam ) + + bam_bai = BOWTIE2_ALIGN.out.bam + .join(SAMTOOLS_INDEX.out.bai, remainder: true) + + SAMTOOLS_STATS ( bam_bai, reference ) + ch_versions = ch_versions.mix(SAMTOOLS_STATS.out.versions.first()) + ch_multiqc_files = ch_multiqc_files.mix( SAMTOOLS_STATS.out.stats ) + emit: + stats = SAMTOOLS_STATS.out.stats reads = BOWTIE2_ALIGN.out.fastq // channel: [ val(meta), [ reads ] ] versions = ch_versions // channel: [ versions.yml ] mqc = ch_multiqc_files From c4a7d094c8b5327eb6e007540c473fbbc5b94e75 Mon Sep 17 00:00:00 2001 From: Sofia Stamouli Date: Fri, 28 Oct 2022 14:33:44 +0200 Subject: [PATCH 2/9] Fix EditorConfig check --- conf/modules.config | 2 -- 1 file changed, 2 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 476ca11..065f017 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -243,8 +243,6 @@ process { ] } - - withName: BBMAP_BBDUK { ext.args = [ "entropy=${params.shortread_complexityfilter_entropy}", From ce611d6efdbde1b6d5fbbd4a9df663d5510458d5 Mon Sep 17 00:00:00 2001 From: Sofia Stamouli Date: Mon, 31 Oct 2022 10:04:51 +0100 Subject: [PATCH 3/9] Add bowtie2 mapped reads --- subworkflows/local/shortread_hostremoval.nf | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/subworkflows/local/shortread_hostremoval.nf b/subworkflows/local/shortread_hostremoval.nf index a1ed758..426442b 100644 --- a/subworkflows/local/shortread_hostremoval.nf +++ b/subworkflows/local/shortread_hostremoval.nf @@ -4,8 +4,9 @@ include { BOWTIE2_BUILD } from '../../modules/nf-core/bowtie2/build/main' include { BOWTIE2_ALIGN } from '../../modules/nf-core/bowtie2/align/main' -include { SAMTOOLS_INDEX } from '../../modules/nf-core/samtools/index/main' -include { SAMTOOLS_STATS } from '../../modules/nf-core/samtools/stats/main' +include { SAMTOOLS_INDEX } from '../../modules/nf-core/samtools/index/main' +include { SAMTOOLS_STATS } from '../../modules/nf-core/samtools/stats/main' +include { SAMTOOLS_VIEW } from '../../modules/nf-core/samtools/view/main' workflow SHORTREAD_HOSTREMOVAL { take: @@ -28,9 +29,19 @@ workflow SHORTREAD_HOSTREMOVAL { ch_versions = ch_versions.mix( BOWTIE2_ALIGN.out.versions.first() ) ch_multiqc_files = ch_multiqc_files.mix( BOWTIE2_ALIGN.out.log ) - SAMTOOLS_INDEX ( BOWTIE2_ALIGN.out.bam ) + ch_bowtie2_mapped = BOWTIE2_ALIGN.out.bam + .map { + meta, reads -> + [ meta, reads, [] ] + } - bam_bai = BOWTIE2_ALIGN.out.bam + SAMTOOLS_VIEW ( ch_bowtie2_mapped, [], [] ) + ch_versions = ch_versions.mix( SAMTOOLS_VIEW.out.versions.first() ) + + SAMTOOLS_INDEX ( SAMTOOLS_VIEW.out.bam ) + ch_versions = ch_versions.mix( SAMTOOLS_INDEX.out.versions.first() ) + + bam_bai = SAMTOOLS_VIEW.out.bam .join(SAMTOOLS_INDEX.out.bai, remainder: true) SAMTOOLS_STATS ( bam_bai, reference ) @@ -38,7 +49,7 @@ workflow SHORTREAD_HOSTREMOVAL { ch_multiqc_files = ch_multiqc_files.mix( SAMTOOLS_STATS.out.stats ) emit: - stats = SAMTOOLS_STATS.out.stats + stats = SAMTOOLS_STATS.out.stats reads = BOWTIE2_ALIGN.out.fastq // channel: [ val(meta), [ reads ] ] versions = ch_versions // channel: [ versions.yml ] mqc = ch_multiqc_files From 1d1047374bf4618b0ffb50bc56e2531fb8697aa9 Mon Sep 17 00:00:00 2001 From: Sofia Stamouli <91951607+sofstam@users.noreply.github.com> Date: Mon, 31 Oct 2022 12:09:25 +0100 Subject: [PATCH 4/9] Fix EditorConfig format --- conf/modules.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/modules.config b/conf/modules.config index 065f017..a3efaf1 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -222,7 +222,7 @@ process { pattern: '*.bam' ] } - + withName: SAMTOOLS_BAM2FQ { ext.prefix = { "${meta.id}_${meta.run_accession}" } publishDir = [ From a4b5fa9f5750820069c90a621640a23a27823d22 Mon Sep 17 00:00:00 2001 From: Sofia Stamouli <91951607+sofstam@users.noreply.github.com> Date: Mon, 31 Oct 2022 12:17:49 +0100 Subject: [PATCH 5/9] Fix trailing space --- conf/modules.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/modules.config b/conf/modules.config index a3efaf1..c2ed5c1 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -233,7 +233,7 @@ process { ] } - withName: SAMTOOLS_STATS { + withName: SAMTOOLS_STATS { ext.prefix = { "${meta.id}_${meta.run_accession}" } publishDir = [ path: { "${params.outdir}/samtools/stats" }, From 0e0c55d3b0092a12be6853bcb070fa0de95230a5 Mon Sep 17 00:00:00 2001 From: Sofia Stamouli <91951607+sofstam@users.noreply.github.com> Date: Mon, 31 Oct 2022 12:35:51 +0100 Subject: [PATCH 6/9] Update modules.config --- conf/modules.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/modules.config b/conf/modules.config index c2ed5c1..397e0c2 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -222,7 +222,7 @@ process { pattern: '*.bam' ] } - + withName: SAMTOOLS_BAM2FQ { ext.prefix = { "${meta.id}_${meta.run_accession}" } publishDir = [ From b5e6639f9d01d2a56ac0e72715eb617b36c57812 Mon Sep 17 00:00:00 2001 From: Sofia Stamouli <91951607+sofstam@users.noreply.github.com> Date: Thu, 3 Nov 2022 14:12:05 +0100 Subject: [PATCH 7/9] Apply review suggestions --- conf/modules.config | 1 - 1 file changed, 1 deletion(-) diff --git a/conf/modules.config b/conf/modules.config index 397e0c2..96bb658 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -238,7 +238,6 @@ process { publishDir = [ path: { "${params.outdir}/samtools/stats" }, mode: params.publish_dir_mode, - enabled: params.save_hostremoval_unmapped, pattern: '*stats' ] } From 79dd3930bb0b714c99da55cb1c320c05295518f1 Mon Sep 17 00:00:00 2001 From: Sofia Stamouli <91951607+sofstam@users.noreply.github.com> Date: Thu, 3 Nov 2022 16:52:01 +0100 Subject: [PATCH 8/9] Add samtools to README --- README.md | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 3f0a840..94ede1e 100644 --- a/README.md +++ b/README.md @@ -34,9 +34,10 @@ On release, automated continuous integration tests run the pipeline on a full-si 2. Performs optional read pre-processing - Adapter clipping and merging (short-read: [fastp](https://github.com/OpenGene/fastp), [AdapterRemoval2](https://github.com/MikkelSchubert/adapterremoval); long-read: [porechop](https://github.com/rrwick/Porechop)) - Low complexity and quality filtering (short-read: [bbduk](https://jgi.doe.gov/data-and-tools/software-tools/bbtools/), [PRINSEQ++](https://github.com/Adrian-Cantu/PRINSEQ-plus-plus); long-read: [Filtlong](https://github.com/rrwick/Filtlong)) - - Host read removal (short-read: [BowTie2](http://bowtie-bio.sourceforge.net/bowtie2/); long-read: [Minimap2](https://github.com/lh3/minimap2)) + - Host-read removal (short-read: [BowTie2](http://bowtie-bio.sourceforge.net/bowtie2/); long-read: [Minimap2](https://github.com/lh3/minimap2)) - Run merging -3. Performs taxonomic profiling using one or more of: +3. Supports statistics for host-read removal ([Samtools](http://www.htslib.org/)) +4. Performs taxonomic profiling using one or more of: - [Kraken2](https://ccb.jhu.edu/software/kraken2/) - [MetaPhlAn3](https://huttenhower.sph.harvard.edu/metaphlan/) - [MALT](https://uni-tuebingen.de/fakultaeten/mathematisch-naturwissenschaftliche-fakultaet/fachbereiche/informatik/lehrstuehle/algorithms-in-bioinformatics/software/malt/) @@ -45,10 +46,10 @@ On release, automated continuous integration tests run the pipeline on a full-si - [Kaiju](https://kaiju.binf.ku.dk/) - [mOTUs](https://motu-tool.org/) - [KrakenUniq](https://github.com/fbreitwieser/krakenuniq) -4. Perform optional post-processing with: +5. Perform optional post-processing with: - [bracken](https://ccb.jhu.edu/software/bracken/) -5. Standardises output tables -6. Present QC for raw reads ([`MultiQC`](http://multiqc.info/)) +6. Standardises output tables +7. Present QC for raw reads ([`MultiQC`](http://multiqc.info/)) 7. Plotting Kraken2, Centrifuge, Kaiju and MALT results ([`Krona`](https://hpc.nih.gov/apps/kronatools.html)) ## Quick Start From 696edfb866a1d196241ea819e7207edd8cd067e8 Mon Sep 17 00:00:00 2001 From: Sofia Stamouli Date: Fri, 4 Nov 2022 12:05:23 +0100 Subject: [PATCH 9/9] Apply prettier --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 94ede1e..5a8884e 100644 --- a/README.md +++ b/README.md @@ -50,7 +50,7 @@ On release, automated continuous integration tests run the pipeline on a full-si - [bracken](https://ccb.jhu.edu/software/bracken/) 6. Standardises output tables 7. Present QC for raw reads ([`MultiQC`](http://multiqc.info/)) -7. Plotting Kraken2, Centrifuge, Kaiju and MALT results ([`Krona`](https://hpc.nih.gov/apps/kronatools.html)) +8. Plotting Kraken2, Centrifuge, Kaiju and MALT results ([`Krona`](https://hpc.nih.gov/apps/kronatools.html)) ## Quick Start