1
0
Fork 0
mirror of https://github.com/MillironX/taxprofiler.git synced 2024-12-22 08:58:16 +00:00

Subsitute samtools/bam2fq with samtools/fastq

This commit is contained in:
sofstam 2023-03-23 15:30:07 +01:00
parent 58795264e4
commit 102d53bd75
6 changed files with 250 additions and 53 deletions

View file

@ -21,7 +21,7 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d
- [Bowtie2](#bowtie2) - Host removal for Illumina reads
- [minimap2](#minimap2) - Host removal for Nanopore reads
- [SAMtools stats](#samtools-stats) - Statistics from host removal
- [SAMtools bam2fq](#samtools-bam2fq) - Converts unmapped BAM file to fastq format (minimap2 only)
- [SAMtools fastq](#samtools-fastq) - Converts unmapped BAM file to fastq format (minimap2 only)
- [Bracken](#bracken) - Taxonomic classifier using k-mers and abundance estimations
- [Kraken2](#kraken2) - Taxonomic classifier using exact k-mer matches
- [KrakenUniq](#krakenuniq) - Taxonomic classifier that combines the k-mer-based classification and the number of unique k-mers found in each species
@ -201,7 +201,7 @@ It is used with nf-core/taxprofiler to allow removal of 'host' (e.g. human) and/
By default nf-core/taxprofiler will only provide the `.log` file if host removal is turned on. You will only have a `.bam` file if you specify `--save_hostremoval_bam`. This will contain _both_ mapped and unmapped reads. You will only get FASTQ files if you specify to save `--save_hostremoval_unmapped` - these contain only unmapped reads.
> Unmapped reads in FASTQ are only found in this directory for short-reads, for long-reads see [`samtools/bam2fq/`](#samtools-bam2fq)
> Unmapped reads in FASTQ are only found in this directory for short-reads, for long-reads see [`samtools/fastq/`](#samtools-fastq)
> ⚠️ The resulting `.fastq` files may _not_ always be the 'final' reads that go into taxprofiling, if you also run other steps such as run merging etc..
@ -228,11 +228,11 @@ By default, nf-core/taxprofiler will only provide the `.bam` file containing map
> minimap2 is not yet supported as a module in MultiQC and therefore there is no dedicated section in the MultiQC HTML. Rather, alignment statistics to host genome is reported via samtools stats module in MultiQC report.
> Unlike Bowtie2, minimap2 does not produce an unmapped FASTQ file by itself. See [`samtools/bam2fq`](#samtools-bam2fq)
> Unlike Bowtie2, minimap2 does not produce an unmapped FASTQ file by itself. See [`samtools/fastq`](#samtools-fastq)
### SAMtools bam2fq
### SAMtools fastq
[SAMtools bam2fq](http://www.htslib.org/doc/1.1/samtools.html) converts a `.sam`, `.bam`, or `.cram` alignment file to FASTQ format
[SAMtools fastq](http://www.htslib.org/doc/1.1/samtools.html) converts a `.sam`, `.bam`, or `.cram` alignment file to FASTQ format
<details markdown="1">
<summary>Output files</summary>

View file

@ -8,217 +8,308 @@
"adapterremoval": {
"branch": "master",
"git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
"installed_by": ["modules"]
"installed_by": [
"modules"
]
},
"bbmap/bbduk": {
"branch": "master",
"git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
"installed_by": ["modules"]
"installed_by": [
"modules"
]
},
"bowtie2/align": {
"branch": "master",
"git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
"installed_by": ["modules"]
"installed_by": [
"modules"
]
},
"bowtie2/build": {
"branch": "master",
"git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
"installed_by": ["modules"]
"installed_by": [
"modules"
]
},
"bracken/bracken": {
"branch": "master",
"git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
"installed_by": ["modules"]
"installed_by": [
"modules"
]
},
"bracken/combinebrackenoutputs": {
"branch": "master",
"git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
"installed_by": ["modules"]
"installed_by": [
"modules"
]
},
"cat/fastq": {
"branch": "master",
"git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
"installed_by": ["modules"]
"installed_by": [
"modules"
]
},
"centrifuge/centrifuge": {
"branch": "master",
"git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
"installed_by": ["modules"]
"installed_by": [
"modules"
]
},
"centrifuge/kreport": {
"branch": "master",
"git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
"installed_by": ["modules"]
"installed_by": [
"modules"
]
},
"custom/dumpsoftwareversions": {
"branch": "master",
"git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
"installed_by": ["modules"]
"installed_by": [
"modules"
]
},
"diamond/blastx": {
"branch": "master",
"git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
"installed_by": ["modules"]
"installed_by": [
"modules"
]
},
"falco": {
"branch": "master",
"git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
"installed_by": ["modules"],
"installed_by": [
"modules"
],
"patch": "modules/nf-core/falco/falco.diff"
},
"fastp": {
"branch": "master",
"git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
"installed_by": ["modules"]
"installed_by": [
"modules"
]
},
"fastqc": {
"branch": "master",
"git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
"installed_by": ["modules"]
"installed_by": [
"modules"
]
},
"filtlong": {
"branch": "master",
"git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
"installed_by": ["modules"]
"installed_by": [
"modules"
]
},
"gunzip": {
"branch": "master",
"git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
"installed_by": ["modules"]
"installed_by": [
"modules"
]
},
"kaiju/kaiju": {
"branch": "master",
"git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
"installed_by": ["modules"]
"installed_by": [
"modules"
]
},
"kaiju/kaiju2krona": {
"branch": "master",
"git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
"installed_by": ["modules"]
"installed_by": [
"modules"
]
},
"kaiju/kaiju2table": {
"branch": "master",
"git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
"installed_by": ["modules"]
"installed_by": [
"modules"
]
},
"kraken2/kraken2": {
"branch": "master",
"git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
"installed_by": ["modules"]
"installed_by": [
"modules"
]
},
"krakentools/combinekreports": {
"branch": "master",
"git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
"installed_by": ["modules"]
"installed_by": [
"modules"
]
},
"krakentools/kreport2krona": {
"branch": "master",
"git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
"installed_by": ["modules"]
"installed_by": [
"modules"
]
},
"krakenuniq/preloadedkrakenuniq": {
"branch": "master",
"git_sha": "a6eb17f65b3ee5761c25c075a6166c9f76733cee",
"installed_by": ["modules"]
"installed_by": [
"modules"
]
},
"krona/ktimporttaxonomy": {
"branch": "master",
"git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
"installed_by": ["modules"]
"installed_by": [
"modules"
]
},
"krona/ktimporttext": {
"branch": "master",
"git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
"installed_by": ["modules"]
"installed_by": [
"modules"
]
},
"malt/run": {
"branch": "master",
"git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
"installed_by": ["modules"]
"installed_by": [
"modules"
]
},
"megan/rma2info": {
"branch": "master",
"git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
"installed_by": ["modules"]
"installed_by": [
"modules"
]
},
"metaphlan3/mergemetaphlantables": {
"branch": "master",
"git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
"installed_by": ["modules"]
"installed_by": [
"modules"
]
},
"metaphlan3/metaphlan3": {
"branch": "master",
"git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
"installed_by": ["modules"]
"installed_by": [
"modules"
]
},
"minimap2/align": {
"branch": "master",
"git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
"installed_by": ["modules"]
"installed_by": [
"modules"
]
},
"minimap2/index": {
"branch": "master",
"git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
"installed_by": ["modules"]
"installed_by": [
"modules"
]
},
"motus/merge": {
"branch": "master",
"git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
"installed_by": ["modules"]
"installed_by": [
"modules"
]
},
"motus/profile": {
"branch": "master",
"git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
"installed_by": ["modules"]
"installed_by": [
"modules"
]
},
"multiqc": {
"branch": "master",
"git_sha": "ee80d14721e76e2e079103b8dcd5d57129e584ba",
"installed_by": ["modules"]
"installed_by": [
"modules"
]
},
"porechop/porechop": {
"branch": "master",
"git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
"installed_by": ["modules"],
"installed_by": [
"modules"
],
"patch": "modules/nf-core/porechop/porechop/porechop-porechop.diff"
},
"prinseqplusplus": {
"branch": "master",
"git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
"installed_by": ["modules"]
"installed_by": [
"modules"
]
},
"samtools/bam2fq": {
"branch": "master",
"git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
"installed_by": ["modules"]
"installed_by": [
"modules"
]
},
"samtools/fastq": {
"branch": "master",
"git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b",
"installed_by": [
"modules"
]
},
"samtools/index": {
"branch": "master",
"git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
"installed_by": ["modules"]
"installed_by": [
"modules"
]
},
"samtools/stats": {
"branch": "master",
"git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
"installed_by": ["modules"]
"installed_by": [
"modules"
]
},
"samtools/view": {
"branch": "master",
"git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
"installed_by": ["modules"]
"installed_by": [
"modules"
]
},
"taxpasta/merge": {
"branch": "master",
"git_sha": "ffa9641ee18f88aff974257cb50ba3cf8f7d143c",
"installed_by": ["modules"]
"installed_by": [
"modules"
]
},
"untar": {
"branch": "master",
"git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
"installed_by": ["modules"]
"installed_by": [
"modules"
]
}
}
}
}
}
}
}

44
modules/nf-core/samtools/fastq/main.nf generated Normal file
View file

@ -0,0 +1,44 @@
process SAMTOOLS_FASTQ {
tag "$meta.id"
label 'process_low'
conda "bioconda::samtools=1.16.1"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/samtools:1.16.1--h6899075_1' :
'quay.io/biocontainers/samtools:1.16.1--h6899075_1' }"
input:
tuple val(meta), path(input)
val(interleave)
output:
tuple val(meta), path("*_{1,2}.fastq.gz") , optional:true, emit: fastq
tuple val(meta), path("*_interleaved.fastq.gz"), optional:true, emit: interleaved
tuple val(meta), path("*_singleton.fastq.gz") , optional:true, emit: singleton
tuple val(meta), path("*_other.fastq.gz") , optional:true, emit: other
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def output = ( interleave && ! meta.single_end ) ? "> ${prefix}_interleaved.fastq.gz" :
meta.single_end ? "-1 ${prefix}_1.fastq.gz -s ${prefix}_singleton.fastq.gz" :
"-1 ${prefix}_1.fastq.gz -2 ${prefix}_2.fastq.gz -s ${prefix}_singleton.fastq.gz"
"""
samtools \\
fastq \\
$args \\
--threads ${task.cpus-1} \\
-0 ${prefix}_other.fastq.gz \\
$input \\
$output
cat <<-END_VERSIONS > versions.yml
"${task.process}":
samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
END_VERSIONS
"""
}

62
modules/nf-core/samtools/fastq/meta.yml generated Normal file
View file

@ -0,0 +1,62 @@
name: samtools_fastq
description: Converts a SAM/BAM/CRAM file to FASTQ
keywords:
- bam
- sam
- cram
- fastq
tools:
- samtools:
description: |
SAMtools is a set of utilities for interacting with and post-processing
short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li.
These files are generated as output by short read aligners like BWA.
homepage: http://www.htslib.org/
documentation: http://www.htslib.org/doc/samtools.html
doi: 10.1093/bioinformatics/btp352
licence: ["MIT"]
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- input:
type: file
description: BAM/CRAM/SAM file
pattern: "*.{bam,cram,sam}"
- interleave:
type: boolean
description: Set true for interleaved fastq file
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- fastq:
type: file
description: Compressed FASTQ file(s) with reads with either the READ1 or READ2 flag set in separate files.
pattern: "*_{1,2}.fastq.gz"
- interleaved:
type: file
description: Compressed FASTQ file with reads with either the READ1 or READ2 flag set in a combined file. Needs collated input file.
pattern: "*_interleaved.fastq.gz"
- singleton:
type: file
description: Compressed FASTQ file with singleton reads
pattern: "*_singleton.fastq.gz"
- other:
type: file
description: Compressed FASTQ file with reads with either both READ1 and READ2 flags set or unset
pattern: "*_other.fastq.gz"
authors:
- "@priyanka-surana"
- "@suzannejin"

View file

@ -304,7 +304,7 @@
"type": "boolean",
"fa_icon": "fas fa-save",
"description": "Save reads from samples that went through the host-removal step",
"help_text": "Save only the reads NOT mapped to the reference genome in FASTQ format (as exported from `samtools view` and `bam2fq`).\n\nThis can be useful if you wish to perform other analyses on the off-target reads from the host mapping, such as manual profiling or _de novo_ assembly."
"help_text": "Save only the reads NOT mapped to the reference genome in FASTQ format (as exported from `samtools view` and `fastq`).\n\nThis can be useful if you wish to perform other analyses on the off-target reads from the host mapping, such as manual profiling or _de novo_ assembly."
}
},
"fa_icon": "fas fa-user-times"

View file

@ -5,7 +5,7 @@
include { MINIMAP2_INDEX } from '../../modules/nf-core/minimap2/index/main'
include { MINIMAP2_ALIGN } from '../../modules/nf-core/minimap2/align/main'
include { SAMTOOLS_VIEW } from '../../modules/nf-core/samtools/view/main'
include { SAMTOOLS_BAM2FQ } from '../../modules/nf-core/samtools/bam2fq/main'
include { SAMTOOLS_FASTQ } from '../../modules/nf-core/samtools/fastq/main'
include { SAMTOOLS_INDEX } from '../../modules/nf-core/samtools/index/main'
include { SAMTOOLS_STATS } from '../../modules/nf-core/samtools/stats/main'
@ -38,8 +38,8 @@ workflow LONGREAD_HOSTREMOVAL {
SAMTOOLS_VIEW ( ch_minimap2_mapped , [], [] )
ch_versions = ch_versions.mix( SAMTOOLS_VIEW.out.versions.first() )
SAMTOOLS_BAM2FQ ( SAMTOOLS_VIEW.out.bam, false )
ch_versions = ch_versions.mix( SAMTOOLS_BAM2FQ.out.versions.first() )
SAMTOOLS_FASTQ ( SAMTOOLS_VIEW.out.bam, false )
ch_versions = ch_versions.mix( SAMTOOLS_FASTQ.out.versions.first() )
// Indexing whole BAM for host removal statistics
SAMTOOLS_INDEX ( MINIMAP2_ALIGN.out.bam )
@ -54,7 +54,7 @@ workflow LONGREAD_HOSTREMOVAL {
emit:
stats = SAMTOOLS_STATS.out.stats //channel: [val(meta), [reads ] ]
reads = SAMTOOLS_BAM2FQ.out.reads // channel: [ val(meta), [ reads ] ]
reads = SAMTOOLS_FASTQ.out.fastq // channel: [ val(meta), [ reads ] ]
versions = ch_versions // channel: [ versions.yml ]
mqc = ch_multiqc_files
}