mirror of
https://github.com/MillironX/taxprofiler.git
synced 2024-11-10 22:03:09 +00:00
Merge pull request #274 from sofstam/samtools_fastq
Subsitute samtools/bam2fq with samtools/fastq
This commit is contained in:
commit
255b492b44
10 changed files with 122 additions and 126 deletions
|
@ -12,6 +12,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
||||||
- [#271](https://github.com/nf-core/taxprofiler/pull/271/files) Improved standardised table generation documentation nd mOTUs manual database download tutorial (♥ to @prototaxites for reporting, fix by @jfy133)
|
- [#271](https://github.com/nf-core/taxprofiler/pull/271/files) Improved standardised table generation documentation nd mOTUs manual database download tutorial (♥ to @prototaxites for reporting, fix by @jfy133)
|
||||||
- [#269](https://github.com/nf-core/taxprofiler/pull/269/files) Reduced output files in AWS full test output due to very large files
|
- [#269](https://github.com/nf-core/taxprofiler/pull/269/files) Reduced output files in AWS full test output due to very large files
|
||||||
- [#270](https://github.com/nf-core/taxprofiler/pull/270/files) Fixed warning for host removal index parameter, and improved index checks (♥ to @prototaxites for reporting, fix by @jfy133)
|
- [#270](https://github.com/nf-core/taxprofiler/pull/270/files) Fixed warning for host removal index parameter, and improved index checks (♥ to @prototaxites for reporting, fix by @jfy133)
|
||||||
|
- [#274](https://github.com/nf-core/taxprofiler/pull/274/files) Substituted the samtools/bam2fq module with samtools/fastq module (fix by @sofstam)
|
||||||
|
|
||||||
### `Dependencies`
|
### `Dependencies`
|
||||||
|
|
||||||
|
|
|
@ -250,12 +250,12 @@ process {
|
||||||
ext.prefix = { "${meta.id}_${meta.run_accession}.unmapped" }
|
ext.prefix = { "${meta.id}_${meta.run_accession}.unmapped" }
|
||||||
}
|
}
|
||||||
|
|
||||||
withName: SAMTOOLS_BAM2FQ {
|
withName: SAMTOOLS_FASTQ {
|
||||||
ext.prefix = { "${meta.id}_${meta.run_accession}.unmapped" }
|
ext.prefix = { "${meta.id}_${meta.run_accession}.unmapped" }
|
||||||
publishDir = [
|
publishDir = [
|
||||||
path: { "${params.outdir}/samtools/bam2fq" },
|
path: { "${params.outdir}/samtools/fastq" },
|
||||||
mode: params.publish_dir_mode,
|
mode: params.publish_dir_mode,
|
||||||
pattern: '*.fq.gz',
|
pattern: '*.fastq.gz',
|
||||||
enabled: params.save_hostremoval_unmapped
|
enabled: params.save_hostremoval_unmapped
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|
|
@ -21,7 +21,7 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d
|
||||||
- [Bowtie2](#bowtie2) - Host removal for Illumina reads
|
- [Bowtie2](#bowtie2) - Host removal for Illumina reads
|
||||||
- [minimap2](#minimap2) - Host removal for Nanopore reads
|
- [minimap2](#minimap2) - Host removal for Nanopore reads
|
||||||
- [SAMtools stats](#samtools-stats) - Statistics from host removal
|
- [SAMtools stats](#samtools-stats) - Statistics from host removal
|
||||||
- [SAMtools bam2fq](#samtools-bam2fq) - Converts unmapped BAM file to fastq format (minimap2 only)
|
- [SAMtools fastq](#samtools-fastq) - Converts unmapped BAM file to fastq format (minimap2 only)
|
||||||
- [Bracken](#bracken) - Taxonomic classifier using k-mers and abundance estimations
|
- [Bracken](#bracken) - Taxonomic classifier using k-mers and abundance estimations
|
||||||
- [Kraken2](#kraken2) - Taxonomic classifier using exact k-mer matches
|
- [Kraken2](#kraken2) - Taxonomic classifier using exact k-mer matches
|
||||||
- [KrakenUniq](#krakenuniq) - Taxonomic classifier that combines the k-mer-based classification and the number of unique k-mers found in each species
|
- [KrakenUniq](#krakenuniq) - Taxonomic classifier that combines the k-mer-based classification and the number of unique k-mers found in each species
|
||||||
|
@ -201,7 +201,7 @@ It is used with nf-core/taxprofiler to allow removal of 'host' (e.g. human) and/
|
||||||
|
|
||||||
By default nf-core/taxprofiler will only provide the `.log` file if host removal is turned on. You will only have a `.bam` file if you specify `--save_hostremoval_bam`. This will contain _both_ mapped and unmapped reads. You will only get FASTQ files if you specify to save `--save_hostremoval_unmapped` - these contain only unmapped reads.
|
By default nf-core/taxprofiler will only provide the `.log` file if host removal is turned on. You will only have a `.bam` file if you specify `--save_hostremoval_bam`. This will contain _both_ mapped and unmapped reads. You will only get FASTQ files if you specify to save `--save_hostremoval_unmapped` - these contain only unmapped reads.
|
||||||
|
|
||||||
> ℹ️ Unmapped reads in FASTQ are only found in this directory for short-reads, for long-reads see [`samtools/bam2fq/`](#samtools-bam2fq)
|
> ℹ️ Unmapped reads in FASTQ are only found in this directory for short-reads, for long-reads see [`samtools/fastq/`](#samtools-fastq)
|
||||||
|
|
||||||
> ⚠️ The resulting `.fastq` files may _not_ always be the 'final' reads that go into taxprofiling, if you also run other steps such as run merging etc..
|
> ⚠️ The resulting `.fastq` files may _not_ always be the 'final' reads that go into taxprofiling, if you also run other steps such as run merging etc..
|
||||||
|
|
||||||
|
@ -228,11 +228,11 @@ By default, nf-core/taxprofiler will only provide the `.bam` file containing map
|
||||||
|
|
||||||
> ℹ️ minimap2 is not yet supported as a module in MultiQC and therefore there is no dedicated section in the MultiQC HTML. Rather, alignment statistics to host genome is reported via samtools stats module in MultiQC report.
|
> ℹ️ minimap2 is not yet supported as a module in MultiQC and therefore there is no dedicated section in the MultiQC HTML. Rather, alignment statistics to host genome is reported via samtools stats module in MultiQC report.
|
||||||
|
|
||||||
> ℹ️ Unlike Bowtie2, minimap2 does not produce an unmapped FASTQ file by itself. See [`samtools/bam2fq`](#samtools-bam2fq)
|
> ℹ️ Unlike Bowtie2, minimap2 does not produce an unmapped FASTQ file by itself. See [`samtools/fastq`](#samtools-fastq)
|
||||||
|
|
||||||
### SAMtools bam2fq
|
### SAMtools fastq
|
||||||
|
|
||||||
[SAMtools bam2fq](http://www.htslib.org/doc/1.1/samtools.html) converts a `.sam`, `.bam`, or `.cram` alignment file to FASTQ format
|
[SAMtools fastq](http://www.htslib.org/doc/1.1/samtools.html) converts a `.sam`, `.bam`, or `.cram` alignment file to FASTQ format
|
||||||
|
|
||||||
<details markdown="1">
|
<details markdown="1">
|
||||||
<summary>Output files</summary>
|
<summary>Output files</summary>
|
||||||
|
|
|
@ -187,9 +187,9 @@
|
||||||
"git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
|
"git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
|
||||||
"installed_by": ["modules"]
|
"installed_by": ["modules"]
|
||||||
},
|
},
|
||||||
"samtools/bam2fq": {
|
"samtools/fastq": {
|
||||||
"branch": "master",
|
"branch": "master",
|
||||||
"git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
|
"git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b",
|
||||||
"installed_by": ["modules"]
|
"installed_by": ["modules"]
|
||||||
},
|
},
|
||||||
"samtools/index": {
|
"samtools/index": {
|
||||||
|
|
56
modules/nf-core/samtools/bam2fq/main.nf
generated
56
modules/nf-core/samtools/bam2fq/main.nf
generated
|
@ -1,56 +0,0 @@
|
||||||
process SAMTOOLS_BAM2FQ {
|
|
||||||
tag "$meta.id"
|
|
||||||
label 'process_low'
|
|
||||||
|
|
||||||
conda "bioconda::samtools=1.16.1"
|
|
||||||
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
|
|
||||||
'https://depot.galaxyproject.org/singularity/samtools:1.16.1--h6899075_1' :
|
|
||||||
'quay.io/biocontainers/samtools:1.16.1--h6899075_1' }"
|
|
||||||
|
|
||||||
input:
|
|
||||||
tuple val(meta), path(inputbam)
|
|
||||||
val split
|
|
||||||
|
|
||||||
output:
|
|
||||||
tuple val(meta), path("*.fq.gz"), emit: reads
|
|
||||||
path "versions.yml" , emit: versions
|
|
||||||
|
|
||||||
when:
|
|
||||||
task.ext.when == null || task.ext.when
|
|
||||||
|
|
||||||
script:
|
|
||||||
def args = task.ext.args ?: ''
|
|
||||||
def prefix = task.ext.prefix ?: "${meta.id}"
|
|
||||||
|
|
||||||
if (split){
|
|
||||||
"""
|
|
||||||
samtools \\
|
|
||||||
bam2fq \\
|
|
||||||
$args \\
|
|
||||||
-@ $task.cpus \\
|
|
||||||
-1 ${prefix}_1.fq.gz \\
|
|
||||||
-2 ${prefix}_2.fq.gz \\
|
|
||||||
-0 ${prefix}_other.fq.gz \\
|
|
||||||
-s ${prefix}_singleton.fq.gz \\
|
|
||||||
$inputbam
|
|
||||||
|
|
||||||
cat <<-END_VERSIONS > versions.yml
|
|
||||||
"${task.process}":
|
|
||||||
samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
|
|
||||||
END_VERSIONS
|
|
||||||
"""
|
|
||||||
} else {
|
|
||||||
"""
|
|
||||||
samtools \\
|
|
||||||
bam2fq \\
|
|
||||||
$args \\
|
|
||||||
-@ $task.cpus \\
|
|
||||||
$inputbam | gzip --no-name > ${prefix}_interleaved.fq.gz
|
|
||||||
|
|
||||||
cat <<-END_VERSIONS > versions.yml
|
|
||||||
"${task.process}":
|
|
||||||
samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
|
|
||||||
END_VERSIONS
|
|
||||||
"""
|
|
||||||
}
|
|
||||||
}
|
|
55
modules/nf-core/samtools/bam2fq/meta.yml
generated
55
modules/nf-core/samtools/bam2fq/meta.yml
generated
|
@ -1,55 +0,0 @@
|
||||||
name: samtools_bam2fq
|
|
||||||
description: |
|
|
||||||
The module uses bam2fq method from samtools to
|
|
||||||
convert a SAM, BAM or CRAM file to FASTQ format
|
|
||||||
keywords:
|
|
||||||
- bam2fq
|
|
||||||
- samtools
|
|
||||||
- fastq
|
|
||||||
tools:
|
|
||||||
- samtools:
|
|
||||||
description: Tools for dealing with SAM, BAM and CRAM files
|
|
||||||
homepage: None
|
|
||||||
documentation: http://www.htslib.org/doc/1.1/samtools.html
|
|
||||||
tool_dev_url: None
|
|
||||||
doi: ""
|
|
||||||
licence: ["MIT"]
|
|
||||||
|
|
||||||
input:
|
|
||||||
- meta:
|
|
||||||
type: map
|
|
||||||
description: |
|
|
||||||
Groovy Map containing sample information
|
|
||||||
e.g. [ id:'test', single_end:false ]
|
|
||||||
- inputbam:
|
|
||||||
type: file
|
|
||||||
description: BAM/CRAM/SAM file
|
|
||||||
pattern: "*.{bam,cram,sam}"
|
|
||||||
- split:
|
|
||||||
type: boolean
|
|
||||||
description: |
|
|
||||||
TRUE/FALSE value to indicate if reads should be separated into
|
|
||||||
/1, /2 and if present other, or singleton.
|
|
||||||
Note: choosing TRUE will generate 4 different files.
|
|
||||||
Choosing FALSE will produce a single file, which will be interleaved in case
|
|
||||||
the input contains paired reads.
|
|
||||||
|
|
||||||
output:
|
|
||||||
- meta:
|
|
||||||
type: map
|
|
||||||
description: |
|
|
||||||
Groovy Map containing sample information
|
|
||||||
e.g. [ id:'test', single_end:false ]
|
|
||||||
- versions:
|
|
||||||
type: file
|
|
||||||
description: File containing software versions
|
|
||||||
pattern: "versions.yml"
|
|
||||||
- reads:
|
|
||||||
type: file
|
|
||||||
description: |
|
|
||||||
FASTQ files, which will be either a group of 4 files (read_1, read_2, other and singleton)
|
|
||||||
or a single interleaved .fq.gz file if the user chooses not to split the reads.
|
|
||||||
pattern: "*.fq.gz"
|
|
||||||
|
|
||||||
authors:
|
|
||||||
- "@lescai"
|
|
44
modules/nf-core/samtools/fastq/main.nf
generated
Normal file
44
modules/nf-core/samtools/fastq/main.nf
generated
Normal file
|
@ -0,0 +1,44 @@
|
||||||
|
process SAMTOOLS_FASTQ {
|
||||||
|
tag "$meta.id"
|
||||||
|
label 'process_low'
|
||||||
|
|
||||||
|
conda "bioconda::samtools=1.16.1"
|
||||||
|
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
|
||||||
|
'https://depot.galaxyproject.org/singularity/samtools:1.16.1--h6899075_1' :
|
||||||
|
'quay.io/biocontainers/samtools:1.16.1--h6899075_1' }"
|
||||||
|
|
||||||
|
input:
|
||||||
|
tuple val(meta), path(input)
|
||||||
|
val(interleave)
|
||||||
|
|
||||||
|
output:
|
||||||
|
tuple val(meta), path("*_{1,2}.fastq.gz") , optional:true, emit: fastq
|
||||||
|
tuple val(meta), path("*_interleaved.fastq.gz"), optional:true, emit: interleaved
|
||||||
|
tuple val(meta), path("*_singleton.fastq.gz") , optional:true, emit: singleton
|
||||||
|
tuple val(meta), path("*_other.fastq.gz") , optional:true, emit: other
|
||||||
|
path "versions.yml" , emit: versions
|
||||||
|
|
||||||
|
when:
|
||||||
|
task.ext.when == null || task.ext.when
|
||||||
|
|
||||||
|
script:
|
||||||
|
def args = task.ext.args ?: ''
|
||||||
|
def prefix = task.ext.prefix ?: "${meta.id}"
|
||||||
|
def output = ( interleave && ! meta.single_end ) ? "> ${prefix}_interleaved.fastq.gz" :
|
||||||
|
meta.single_end ? "-1 ${prefix}_1.fastq.gz -s ${prefix}_singleton.fastq.gz" :
|
||||||
|
"-1 ${prefix}_1.fastq.gz -2 ${prefix}_2.fastq.gz -s ${prefix}_singleton.fastq.gz"
|
||||||
|
"""
|
||||||
|
samtools \\
|
||||||
|
fastq \\
|
||||||
|
$args \\
|
||||||
|
--threads ${task.cpus-1} \\
|
||||||
|
-0 ${prefix}_other.fastq.gz \\
|
||||||
|
$input \\
|
||||||
|
$output
|
||||||
|
|
||||||
|
cat <<-END_VERSIONS > versions.yml
|
||||||
|
"${task.process}":
|
||||||
|
samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
|
||||||
|
END_VERSIONS
|
||||||
|
"""
|
||||||
|
}
|
62
modules/nf-core/samtools/fastq/meta.yml
generated
Normal file
62
modules/nf-core/samtools/fastq/meta.yml
generated
Normal file
|
@ -0,0 +1,62 @@
|
||||||
|
name: samtools_fastq
|
||||||
|
description: Converts a SAM/BAM/CRAM file to FASTQ
|
||||||
|
keywords:
|
||||||
|
- bam
|
||||||
|
- sam
|
||||||
|
- cram
|
||||||
|
- fastq
|
||||||
|
tools:
|
||||||
|
- samtools:
|
||||||
|
description: |
|
||||||
|
SAMtools is a set of utilities for interacting with and post-processing
|
||||||
|
short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li.
|
||||||
|
These files are generated as output by short read aligners like BWA.
|
||||||
|
homepage: http://www.htslib.org/
|
||||||
|
documentation: http://www.htslib.org/doc/samtools.html
|
||||||
|
doi: 10.1093/bioinformatics/btp352
|
||||||
|
licence: ["MIT"]
|
||||||
|
|
||||||
|
input:
|
||||||
|
- meta:
|
||||||
|
type: map
|
||||||
|
description: |
|
||||||
|
Groovy Map containing sample information
|
||||||
|
e.g. [ id:'test', single_end:false ]
|
||||||
|
- input:
|
||||||
|
type: file
|
||||||
|
description: BAM/CRAM/SAM file
|
||||||
|
pattern: "*.{bam,cram,sam}"
|
||||||
|
- interleave:
|
||||||
|
type: boolean
|
||||||
|
description: Set true for interleaved fastq file
|
||||||
|
|
||||||
|
output:
|
||||||
|
- meta:
|
||||||
|
type: map
|
||||||
|
description: |
|
||||||
|
Groovy Map containing sample information
|
||||||
|
e.g. [ id:'test', single_end:false ]
|
||||||
|
- versions:
|
||||||
|
type: file
|
||||||
|
description: File containing software versions
|
||||||
|
pattern: "versions.yml"
|
||||||
|
- fastq:
|
||||||
|
type: file
|
||||||
|
description: Compressed FASTQ file(s) with reads with either the READ1 or READ2 flag set in separate files.
|
||||||
|
pattern: "*_{1,2}.fastq.gz"
|
||||||
|
- interleaved:
|
||||||
|
type: file
|
||||||
|
description: Compressed FASTQ file with reads with either the READ1 or READ2 flag set in a combined file. Needs collated input file.
|
||||||
|
pattern: "*_interleaved.fastq.gz"
|
||||||
|
- singleton:
|
||||||
|
type: file
|
||||||
|
description: Compressed FASTQ file with singleton reads
|
||||||
|
pattern: "*_singleton.fastq.gz"
|
||||||
|
- other:
|
||||||
|
type: file
|
||||||
|
description: Compressed FASTQ file with reads with either both READ1 and READ2 flags set or unset
|
||||||
|
pattern: "*_other.fastq.gz"
|
||||||
|
|
||||||
|
authors:
|
||||||
|
- "@priyanka-surana"
|
||||||
|
- "@suzannejin"
|
|
@ -304,7 +304,7 @@
|
||||||
"type": "boolean",
|
"type": "boolean",
|
||||||
"fa_icon": "fas fa-save",
|
"fa_icon": "fas fa-save",
|
||||||
"description": "Save reads from samples that went through the host-removal step",
|
"description": "Save reads from samples that went through the host-removal step",
|
||||||
"help_text": "Save only the reads NOT mapped to the reference genome in FASTQ format (as exported from `samtools view` and `bam2fq`).\n\nThis can be useful if you wish to perform other analyses on the off-target reads from the host mapping, such as manual profiling or _de novo_ assembly."
|
"help_text": "Save only the reads NOT mapped to the reference genome in FASTQ format (as exported from `samtools view` and `fastq`).\n\nThis can be useful if you wish to perform other analyses on the off-target reads from the host mapping, such as manual profiling or _de novo_ assembly."
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"fa_icon": "fas fa-user-times"
|
"fa_icon": "fas fa-user-times"
|
||||||
|
|
|
@ -5,7 +5,7 @@
|
||||||
include { MINIMAP2_INDEX } from '../../modules/nf-core/minimap2/index/main'
|
include { MINIMAP2_INDEX } from '../../modules/nf-core/minimap2/index/main'
|
||||||
include { MINIMAP2_ALIGN } from '../../modules/nf-core/minimap2/align/main'
|
include { MINIMAP2_ALIGN } from '../../modules/nf-core/minimap2/align/main'
|
||||||
include { SAMTOOLS_VIEW } from '../../modules/nf-core/samtools/view/main'
|
include { SAMTOOLS_VIEW } from '../../modules/nf-core/samtools/view/main'
|
||||||
include { SAMTOOLS_BAM2FQ } from '../../modules/nf-core/samtools/bam2fq/main'
|
include { SAMTOOLS_FASTQ } from '../../modules/nf-core/samtools/fastq/main'
|
||||||
include { SAMTOOLS_INDEX } from '../../modules/nf-core/samtools/index/main'
|
include { SAMTOOLS_INDEX } from '../../modules/nf-core/samtools/index/main'
|
||||||
include { SAMTOOLS_STATS } from '../../modules/nf-core/samtools/stats/main'
|
include { SAMTOOLS_STATS } from '../../modules/nf-core/samtools/stats/main'
|
||||||
|
|
||||||
|
@ -38,8 +38,8 @@ workflow LONGREAD_HOSTREMOVAL {
|
||||||
SAMTOOLS_VIEW ( ch_minimap2_mapped , [], [] )
|
SAMTOOLS_VIEW ( ch_minimap2_mapped , [], [] )
|
||||||
ch_versions = ch_versions.mix( SAMTOOLS_VIEW.out.versions.first() )
|
ch_versions = ch_versions.mix( SAMTOOLS_VIEW.out.versions.first() )
|
||||||
|
|
||||||
SAMTOOLS_BAM2FQ ( SAMTOOLS_VIEW.out.bam, false )
|
SAMTOOLS_FASTQ ( SAMTOOLS_VIEW.out.bam, false )
|
||||||
ch_versions = ch_versions.mix( SAMTOOLS_BAM2FQ.out.versions.first() )
|
ch_versions = ch_versions.mix( SAMTOOLS_FASTQ.out.versions.first() )
|
||||||
|
|
||||||
// Indexing whole BAM for host removal statistics
|
// Indexing whole BAM for host removal statistics
|
||||||
SAMTOOLS_INDEX ( MINIMAP2_ALIGN.out.bam )
|
SAMTOOLS_INDEX ( MINIMAP2_ALIGN.out.bam )
|
||||||
|
@ -54,7 +54,7 @@ workflow LONGREAD_HOSTREMOVAL {
|
||||||
|
|
||||||
emit:
|
emit:
|
||||||
stats = SAMTOOLS_STATS.out.stats //channel: [val(meta), [reads ] ]
|
stats = SAMTOOLS_STATS.out.stats //channel: [val(meta), [reads ] ]
|
||||||
reads = SAMTOOLS_BAM2FQ.out.reads // channel: [ val(meta), [ reads ] ]
|
reads = SAMTOOLS_FASTQ.out.fastq.mix( SAMTOOLS_FASTQ.out.other) // channel: [ val(meta), [ reads ] ]
|
||||||
versions = ch_versions // channel: [ versions.yml ]
|
versions = ch_versions // channel: [ versions.yml ]
|
||||||
mqc = ch_multiqc_files
|
mqc = ch_multiqc_files
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue