From 1bd6dbfe58b16d3115ff214295dd6835a6a8f4d5 Mon Sep 17 00:00:00 2001 From: Sofia Stamouli Date: Fri, 28 Oct 2022 11:40:50 +0200 Subject: [PATCH 1/3] Add samtools stats for long-reads --- assets/multiqc_config.yml | 1 + modules.json | 8 ++++ modules/nf-core/samtools/index/main.nf | 48 ++++++++++++++++++++ modules/nf-core/samtools/index/meta.yml | 53 ++++++++++++++++++++++ modules/nf-core/samtools/stats/main.nf | 49 ++++++++++++++++++++ modules/nf-core/samtools/stats/meta.yml | 53 ++++++++++++++++++++++ subworkflows/local/longread_hostremoval.nf | 15 ++++++ workflows/taxprofiler.nf | 4 ++ 8 files changed, 231 insertions(+) create mode 100644 modules/nf-core/samtools/index/main.nf create mode 100644 modules/nf-core/samtools/index/meta.yml create mode 100644 modules/nf-core/samtools/stats/main.nf create mode 100644 modules/nf-core/samtools/stats/meta.yml diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index e2b5a6e..e4a04a9 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -21,6 +21,7 @@ run_modules: - adapterRemoval - fastp - bowtie2 + - samtools - kraken - malt - custom_content diff --git a/modules.json b/modules.json index bcf2567..be3d193 100644 --- a/modules.json +++ b/modules.json @@ -153,6 +153,14 @@ "branch": "master", "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905" }, + "samtools/index": { + "branch": "master", + "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905" + }, + "samtools/stats": { + "branch": "master", + "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905" + }, "samtools/view": { "branch": "master", "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905" diff --git a/modules/nf-core/samtools/index/main.nf b/modules/nf-core/samtools/index/main.nf new file mode 100644 index 0000000..e04e63e --- /dev/null +++ b/modules/nf-core/samtools/index/main.nf @@ -0,0 +1,48 @@ +process SAMTOOLS_INDEX { + tag "$meta.id" + label 'process_low' + + conda (params.enable_conda ? "bioconda::samtools=1.15.1" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.15.1--h1170115_0' : + 'quay.io/biocontainers/samtools:1.15.1--h1170115_0' }" + + input: + tuple val(meta), path(input) + + output: + tuple val(meta), path("*.bai") , optional:true, emit: bai + tuple val(meta), path("*.csi") , optional:true, emit: csi + tuple val(meta), path("*.crai"), optional:true, emit: crai + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + """ + samtools \\ + index \\ + -@ ${task.cpus-1} \\ + $args \\ + $input + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + """ + touch ${input}.bai + touch ${input}.crai + touch ${input}.csi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/samtools/index/meta.yml b/modules/nf-core/samtools/index/meta.yml new file mode 100644 index 0000000..e5cadbc --- /dev/null +++ b/modules/nf-core/samtools/index/meta.yml @@ -0,0 +1,53 @@ +name: samtools_index +description: Index SAM/BAM/CRAM file +keywords: + - index + - bam + - sam + - cram +tools: + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: hhttp://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bai: + type: file + description: BAM/CRAM/SAM index file + pattern: "*.{bai,crai,sai}" + - crai: + type: file + description: BAM/CRAM/SAM index file + pattern: "*.{bai,crai,sai}" + - csi: + type: file + description: CSI index file + pattern: "*.{csi}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@ewels" + - "@maxulysse" diff --git a/modules/nf-core/samtools/stats/main.nf b/modules/nf-core/samtools/stats/main.nf new file mode 100644 index 0000000..9b0c386 --- /dev/null +++ b/modules/nf-core/samtools/stats/main.nf @@ -0,0 +1,49 @@ +process SAMTOOLS_STATS { + tag "$meta.id" + label 'process_single' + + conda (params.enable_conda ? "bioconda::samtools=1.15.1" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.15.1--h1170115_0' : + 'quay.io/biocontainers/samtools:1.15.1--h1170115_0' }" + + input: + tuple val(meta), path(input), path(input_index) + path fasta + + output: + tuple val(meta), path("*.stats"), emit: stats + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def reference = fasta ? "--reference ${fasta}" : "" + """ + samtools \\ + stats \\ + --threads ${task.cpus} \\ + ${reference} \\ + ${input} \\ + > ${prefix}.stats + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.stats + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/samtools/stats/meta.yml b/modules/nf-core/samtools/stats/meta.yml new file mode 100644 index 0000000..cac50b1 --- /dev/null +++ b/modules/nf-core/samtools/stats/meta.yml @@ -0,0 +1,53 @@ +name: samtools_stats +description: Produces comprehensive statistics from SAM/BAM/CRAM file +keywords: + - statistics + - counts + - bam + - sam + - cram +tools: + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: hhttp://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: BAM/CRAM file from alignment + pattern: "*.{bam,cram}" + - input_index: + type: file + description: BAI/CRAI file from alignment + pattern: "*.{bai,crai}" + - fasta: + type: optional file + description: Reference file the CRAM was created with + pattern: "*.{fasta,fa}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - stats: + type: file + description: File containing samtools stats output + pattern: "*.{stats}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@FriederikeHanssen" diff --git a/subworkflows/local/longread_hostremoval.nf b/subworkflows/local/longread_hostremoval.nf index 82ea8ca..fb79e12 100644 --- a/subworkflows/local/longread_hostremoval.nf +++ b/subworkflows/local/longread_hostremoval.nf @@ -6,6 +6,8 @@ include { MINIMAP2_INDEX } from '../../modules/nf-core/minimap2/inde include { MINIMAP2_ALIGN } from '../../modules/nf-core/minimap2/align/main' include { SAMTOOLS_VIEW } from '../../modules/nf-core/samtools/view/main' include { SAMTOOLS_BAM2FQ } from '../../modules/nf-core/samtools/bam2fq/main' +include { SAMTOOLS_INDEX } from '../../modules/nf-core/samtools/index/main' +include { SAMTOOLS_STATS } from '../../modules/nf-core/samtools/stats/main' workflow LONGREAD_HOSTREMOVAL { take: @@ -39,9 +41,22 @@ workflow LONGREAD_HOSTREMOVAL { SAMTOOLS_BAM2FQ ( SAMTOOLS_VIEW.out.bam, false ) ch_versions = ch_versions.mix( SAMTOOLS_BAM2FQ.out.versions.first() ) + SAMTOOLS_INDEX ( SAMTOOLS_VIEW.out.bam ) + ch_versions = ch_versions.mix( SAMTOOLS_INDEX.out.versions.first() ) + + SAMTOOLS_VIEW.out.bam + .join(SAMTOOLS_INDEX.out.bai, by: [0], remainder: true) + .set { bam_bai } + + SAMTOOLS_STATS ( bam_bai, reference ) + ch_versions = ch_versions.mix(SAMTOOLS_STATS.out.versions.first()) + ch_multiqc_files = ch_multiqc_files.mix( SAMTOOLS_STATS.out.stats ) + emit: + stats = SAMTOOLS_STATS.out.stats //channel: [val(meta), [reads ] ] reads = SAMTOOLS_BAM2FQ.out.reads // channel: [ val(meta), [ reads ] ] versions = ch_versions // channel: [ versions.yml ] + mqc = ch_multiqc_files } diff --git a/workflows/taxprofiler.nf b/workflows/taxprofiler.nf index b456b06..07660ab 100644 --- a/workflows/taxprofiler.nf +++ b/workflows/taxprofiler.nf @@ -281,6 +281,10 @@ workflow TAXPROFILER { ch_multiqc_files = ch_multiqc_files.mix(SHORTREAD_HOSTREMOVAL.out.mqc.collect{it[1]}.ifEmpty([])) } + if (params.perform_longread_hostremoval) { + ch_multiqc_files = ch_multiqc_files.mix(LONGREAD_HOSTREMOVAL.out.mqc.collect{it[1]}.ifEmpty([])) + } + ch_multiqc_files = ch_multiqc_files.mix( PROFILING.out.mqc.collect{it[1]}.ifEmpty([]) ) if ( params.run_profile_standardisation ) { From f87131c7b1c212c3169b0ee3d6462eaa72d2a021 Mon Sep 17 00:00:00 2001 From: Sofia Stamouli Date: Fri, 28 Oct 2022 14:10:24 +0200 Subject: [PATCH 2/3] Save samtools stats output --- conf/modules.config | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/conf/modules.config b/conf/modules.config index a4d36fd..397e0c2 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -233,6 +233,16 @@ process { ] } + withName: SAMTOOLS_STATS { + ext.prefix = { "${meta.id}_${meta.run_accession}" } + publishDir = [ + path: { "${params.outdir}/samtools/stats" }, + mode: params.publish_dir_mode, + enabled: params.save_hostremoval_unmapped, + pattern: '*stats' + ] + } + withName: BBMAP_BBDUK { ext.args = [ "entropy=${params.shortread_complexityfilter_entropy}", From f1778acf35a6b5e9e00601441e982acf401bdaec Mon Sep 17 00:00:00 2001 From: Sofia Stamouli <91951607+sofstam@users.noreply.github.com> Date: Mon, 31 Oct 2022 09:31:13 +0100 Subject: [PATCH 3/3] Apply review suggestion --- subworkflows/local/longread_hostremoval.nf | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/subworkflows/local/longread_hostremoval.nf b/subworkflows/local/longread_hostremoval.nf index fb79e12..5f3f746 100644 --- a/subworkflows/local/longread_hostremoval.nf +++ b/subworkflows/local/longread_hostremoval.nf @@ -44,9 +44,8 @@ workflow LONGREAD_HOSTREMOVAL { SAMTOOLS_INDEX ( SAMTOOLS_VIEW.out.bam ) ch_versions = ch_versions.mix( SAMTOOLS_INDEX.out.versions.first() ) - SAMTOOLS_VIEW.out.bam - .join(SAMTOOLS_INDEX.out.bai, by: [0], remainder: true) - .set { bam_bai } + bam_bai = SAMTOOLS_VIEW.out.bam + .join(SAMTOOLS_INDEX.out.bai, remainder: true) SAMTOOLS_STATS ( bam_bai, reference ) ch_versions = ch_versions.mix(SAMTOOLS_STATS.out.versions.first())