1
0
Fork 0
mirror of https://github.com/MillironX/taxprofiler.git synced 2024-11-14 17:13:10 +00:00

Add samtools stats for long-reads

This commit is contained in:
Sofia Stamouli 2022-10-28 11:40:50 +02:00
parent bf89525bc2
commit 1bd6dbfe58
8 changed files with 231 additions and 0 deletions

View file

@ -21,6 +21,7 @@ run_modules:
- adapterRemoval - adapterRemoval
- fastp - fastp
- bowtie2 - bowtie2
- samtools
- kraken - kraken
- malt - malt
- custom_content - custom_content

View file

@ -153,6 +153,14 @@
"branch": "master", "branch": "master",
"git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905" "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905"
}, },
"samtools/index": {
"branch": "master",
"git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905"
},
"samtools/stats": {
"branch": "master",
"git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905"
},
"samtools/view": { "samtools/view": {
"branch": "master", "branch": "master",
"git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905" "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905"

48
modules/nf-core/samtools/index/main.nf generated Normal file
View file

@ -0,0 +1,48 @@
process SAMTOOLS_INDEX {
tag "$meta.id"
label 'process_low'
conda (params.enable_conda ? "bioconda::samtools=1.15.1" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/samtools:1.15.1--h1170115_0' :
'quay.io/biocontainers/samtools:1.15.1--h1170115_0' }"
input:
tuple val(meta), path(input)
output:
tuple val(meta), path("*.bai") , optional:true, emit: bai
tuple val(meta), path("*.csi") , optional:true, emit: csi
tuple val(meta), path("*.crai"), optional:true, emit: crai
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
"""
samtools \\
index \\
-@ ${task.cpus-1} \\
$args \\
$input
cat <<-END_VERSIONS > versions.yml
"${task.process}":
samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
END_VERSIONS
"""
stub:
"""
touch ${input}.bai
touch ${input}.crai
touch ${input}.csi
cat <<-END_VERSIONS > versions.yml
"${task.process}":
samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
END_VERSIONS
"""
}

53
modules/nf-core/samtools/index/meta.yml generated Normal file
View file

@ -0,0 +1,53 @@
name: samtools_index
description: Index SAM/BAM/CRAM file
keywords:
- index
- bam
- sam
- cram
tools:
- samtools:
description: |
SAMtools is a set of utilities for interacting with and post-processing
short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li.
These files are generated as output by short read aligners like BWA.
homepage: http://www.htslib.org/
documentation: hhttp://www.htslib.org/doc/samtools.html
doi: 10.1093/bioinformatics/btp352
licence: ["MIT"]
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- bam:
type: file
description: BAM/CRAM/SAM file
pattern: "*.{bam,cram,sam}"
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- bai:
type: file
description: BAM/CRAM/SAM index file
pattern: "*.{bai,crai,sai}"
- crai:
type: file
description: BAM/CRAM/SAM index file
pattern: "*.{bai,crai,sai}"
- csi:
type: file
description: CSI index file
pattern: "*.{csi}"
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
authors:
- "@drpatelh"
- "@ewels"
- "@maxulysse"

49
modules/nf-core/samtools/stats/main.nf generated Normal file
View file

@ -0,0 +1,49 @@
process SAMTOOLS_STATS {
tag "$meta.id"
label 'process_single'
conda (params.enable_conda ? "bioconda::samtools=1.15.1" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/samtools:1.15.1--h1170115_0' :
'quay.io/biocontainers/samtools:1.15.1--h1170115_0' }"
input:
tuple val(meta), path(input), path(input_index)
path fasta
output:
tuple val(meta), path("*.stats"), emit: stats
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def reference = fasta ? "--reference ${fasta}" : ""
"""
samtools \\
stats \\
--threads ${task.cpus} \\
${reference} \\
${input} \\
> ${prefix}.stats
cat <<-END_VERSIONS > versions.yml
"${task.process}":
samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
END_VERSIONS
"""
stub:
def prefix = task.ext.prefix ?: "${meta.id}"
"""
touch ${prefix}.stats
cat <<-END_VERSIONS > versions.yml
"${task.process}":
samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
END_VERSIONS
"""
}

53
modules/nf-core/samtools/stats/meta.yml generated Normal file
View file

@ -0,0 +1,53 @@
name: samtools_stats
description: Produces comprehensive statistics from SAM/BAM/CRAM file
keywords:
- statistics
- counts
- bam
- sam
- cram
tools:
- samtools:
description: |
SAMtools is a set of utilities for interacting with and post-processing
short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li.
These files are generated as output by short read aligners like BWA.
homepage: http://www.htslib.org/
documentation: hhttp://www.htslib.org/doc/samtools.html
doi: 10.1093/bioinformatics/btp352
licence: ["MIT"]
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- input:
type: file
description: BAM/CRAM file from alignment
pattern: "*.{bam,cram}"
- input_index:
type: file
description: BAI/CRAI file from alignment
pattern: "*.{bai,crai}"
- fasta:
type: optional file
description: Reference file the CRAM was created with
pattern: "*.{fasta,fa}"
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- stats:
type: file
description: File containing samtools stats output
pattern: "*.{stats}"
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
authors:
- "@drpatelh"
- "@FriederikeHanssen"

View file

@ -6,6 +6,8 @@ include { MINIMAP2_INDEX } from '../../modules/nf-core/minimap2/inde
include { MINIMAP2_ALIGN } from '../../modules/nf-core/minimap2/align/main' include { MINIMAP2_ALIGN } from '../../modules/nf-core/minimap2/align/main'
include { SAMTOOLS_VIEW } from '../../modules/nf-core/samtools/view/main' include { SAMTOOLS_VIEW } from '../../modules/nf-core/samtools/view/main'
include { SAMTOOLS_BAM2FQ } from '../../modules/nf-core/samtools/bam2fq/main' include { SAMTOOLS_BAM2FQ } from '../../modules/nf-core/samtools/bam2fq/main'
include { SAMTOOLS_INDEX } from '../../modules/nf-core/samtools/index/main'
include { SAMTOOLS_STATS } from '../../modules/nf-core/samtools/stats/main'
workflow LONGREAD_HOSTREMOVAL { workflow LONGREAD_HOSTREMOVAL {
take: take:
@ -39,9 +41,22 @@ workflow LONGREAD_HOSTREMOVAL {
SAMTOOLS_BAM2FQ ( SAMTOOLS_VIEW.out.bam, false ) SAMTOOLS_BAM2FQ ( SAMTOOLS_VIEW.out.bam, false )
ch_versions = ch_versions.mix( SAMTOOLS_BAM2FQ.out.versions.first() ) ch_versions = ch_versions.mix( SAMTOOLS_BAM2FQ.out.versions.first() )
SAMTOOLS_INDEX ( SAMTOOLS_VIEW.out.bam )
ch_versions = ch_versions.mix( SAMTOOLS_INDEX.out.versions.first() )
SAMTOOLS_VIEW.out.bam
.join(SAMTOOLS_INDEX.out.bai, by: [0], remainder: true)
.set { bam_bai }
SAMTOOLS_STATS ( bam_bai, reference )
ch_versions = ch_versions.mix(SAMTOOLS_STATS.out.versions.first())
ch_multiqc_files = ch_multiqc_files.mix( SAMTOOLS_STATS.out.stats )
emit: emit:
stats = SAMTOOLS_STATS.out.stats //channel: [val(meta), [reads ] ]
reads = SAMTOOLS_BAM2FQ.out.reads // channel: [ val(meta), [ reads ] ] reads = SAMTOOLS_BAM2FQ.out.reads // channel: [ val(meta), [ reads ] ]
versions = ch_versions // channel: [ versions.yml ] versions = ch_versions // channel: [ versions.yml ]
mqc = ch_multiqc_files
} }

View file

@ -281,6 +281,10 @@ workflow TAXPROFILER {
ch_multiqc_files = ch_multiqc_files.mix(SHORTREAD_HOSTREMOVAL.out.mqc.collect{it[1]}.ifEmpty([])) ch_multiqc_files = ch_multiqc_files.mix(SHORTREAD_HOSTREMOVAL.out.mqc.collect{it[1]}.ifEmpty([]))
} }
if (params.perform_longread_hostremoval) {
ch_multiqc_files = ch_multiqc_files.mix(LONGREAD_HOSTREMOVAL.out.mqc.collect{it[1]}.ifEmpty([]))
}
ch_multiqc_files = ch_multiqc_files.mix( PROFILING.out.mqc.collect{it[1]}.ifEmpty([]) ) ch_multiqc_files = ch_multiqc_files.mix( PROFILING.out.mqc.collect{it[1]}.ifEmpty([]) )
if ( params.run_profile_standardisation ) { if ( params.run_profile_standardisation ) {