diff --git a/docs/output.md b/docs/output.md index 4ef6770..4240b25 100644 --- a/docs/output.md +++ b/docs/output.md @@ -21,7 +21,7 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d - [Bowtie2](#bowtie2) - Host removal for Illumina reads - [minimap2](#minimap2) - Host removal for Nanopore reads - [SAMtools stats](#samtools-stats) - Statistics from host removal -- [SAMtools bam2fq](#samtools-bam2fq) - Converts unmapped BAM file to fastq format (minimap2 only) +- [SAMtools fastq](#samtools-fastq) - Converts unmapped BAM file to fastq format (minimap2 only) - [Bracken](#bracken) - Taxonomic classifier using k-mers and abundance estimations - [Kraken2](#kraken2) - Taxonomic classifier using exact k-mer matches - [KrakenUniq](#krakenuniq) - Taxonomic classifier that combines the k-mer-based classification and the number of unique k-mers found in each species @@ -201,7 +201,7 @@ It is used with nf-core/taxprofiler to allow removal of 'host' (e.g. human) and/ By default nf-core/taxprofiler will only provide the `.log` file if host removal is turned on. You will only have a `.bam` file if you specify `--save_hostremoval_bam`. This will contain _both_ mapped and unmapped reads. You will only get FASTQ files if you specify to save `--save_hostremoval_unmapped` - these contain only unmapped reads. -> ℹ️ Unmapped reads in FASTQ are only found in this directory for short-reads, for long-reads see [`samtools/bam2fq/`](#samtools-bam2fq) +> ℹ️ Unmapped reads in FASTQ are only found in this directory for short-reads, for long-reads see [`samtools/fastq/`](#samtools-fastq) > ⚠️ The resulting `.fastq` files may _not_ always be the 'final' reads that go into taxprofiling, if you also run other steps such as run merging etc.. @@ -228,11 +228,11 @@ By default, nf-core/taxprofiler will only provide the `.bam` file containing map > ℹ️ minimap2 is not yet supported as a module in MultiQC and therefore there is no dedicated section in the MultiQC HTML. Rather, alignment statistics to host genome is reported via samtools stats module in MultiQC report. -> ℹ️ Unlike Bowtie2, minimap2 does not produce an unmapped FASTQ file by itself. See [`samtools/bam2fq`](#samtools-bam2fq) +> ℹ️ Unlike Bowtie2, minimap2 does not produce an unmapped FASTQ file by itself. See [`samtools/fastq`](#samtools-fastq) -### SAMtools bam2fq +### SAMtools fastq -[SAMtools bam2fq](http://www.htslib.org/doc/1.1/samtools.html) converts a `.sam`, `.bam`, or `.cram` alignment file to FASTQ format +[SAMtools fastq](http://www.htslib.org/doc/1.1/samtools.html) converts a `.sam`, `.bam`, or `.cram` alignment file to FASTQ format
Output files diff --git a/modules.json b/modules.json index 1a5cd64..b61b140 100644 --- a/modules.json +++ b/modules.json @@ -8,217 +8,308 @@ "adapterremoval": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "bbmap/bbduk": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "bowtie2/align": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "bowtie2/build": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "bracken/bracken": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "bracken/combinebrackenoutputs": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "cat/fastq": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "centrifuge/centrifuge": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "centrifuge/kreport": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "custom/dumpsoftwareversions": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "diamond/blastx": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "falco": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"], + "installed_by": [ + "modules" + ], "patch": "modules/nf-core/falco/falco.diff" }, "fastp": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "fastqc": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "filtlong": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "gunzip": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "kaiju/kaiju": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "kaiju/kaiju2krona": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "kaiju/kaiju2table": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "kraken2/kraken2": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "krakentools/combinekreports": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "krakentools/kreport2krona": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "krakenuniq/preloadedkrakenuniq": { "branch": "master", "git_sha": "a6eb17f65b3ee5761c25c075a6166c9f76733cee", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "krona/ktimporttaxonomy": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "krona/ktimporttext": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "malt/run": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "megan/rma2info": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "metaphlan3/mergemetaphlantables": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "metaphlan3/metaphlan3": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "minimap2/align": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "minimap2/index": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "motus/merge": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "motus/profile": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "multiqc": { "branch": "master", "git_sha": "ee80d14721e76e2e079103b8dcd5d57129e584ba", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "porechop/porechop": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"], + "installed_by": [ + "modules" + ], "patch": "modules/nf-core/porechop/porechop/porechop-porechop.diff" }, "prinseqplusplus": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "samtools/bam2fq": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] + }, + "samtools/fastq": { + "branch": "master", + "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", + "installed_by": [ + "modules" + ] }, "samtools/index": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "samtools/stats": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "samtools/view": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "taxpasta/merge": { "branch": "master", "git_sha": "ffa9641ee18f88aff974257cb50ba3cf8f7d143c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "untar": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] } } } } } -} +} \ No newline at end of file diff --git a/modules/nf-core/samtools/fastq/main.nf b/modules/nf-core/samtools/fastq/main.nf new file mode 100644 index 0000000..c0b36f6 --- /dev/null +++ b/modules/nf-core/samtools/fastq/main.nf @@ -0,0 +1,44 @@ +process SAMTOOLS_FASTQ { + tag "$meta.id" + label 'process_low' + + conda "bioconda::samtools=1.16.1" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.16.1--h6899075_1' : + 'quay.io/biocontainers/samtools:1.16.1--h6899075_1' }" + + input: + tuple val(meta), path(input) + val(interleave) + + output: + tuple val(meta), path("*_{1,2}.fastq.gz") , optional:true, emit: fastq + tuple val(meta), path("*_interleaved.fastq.gz"), optional:true, emit: interleaved + tuple val(meta), path("*_singleton.fastq.gz") , optional:true, emit: singleton + tuple val(meta), path("*_other.fastq.gz") , optional:true, emit: other + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def output = ( interleave && ! meta.single_end ) ? "> ${prefix}_interleaved.fastq.gz" : + meta.single_end ? "-1 ${prefix}_1.fastq.gz -s ${prefix}_singleton.fastq.gz" : + "-1 ${prefix}_1.fastq.gz -2 ${prefix}_2.fastq.gz -s ${prefix}_singleton.fastq.gz" + """ + samtools \\ + fastq \\ + $args \\ + --threads ${task.cpus-1} \\ + -0 ${prefix}_other.fastq.gz \\ + $input \\ + $output + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/samtools/fastq/meta.yml b/modules/nf-core/samtools/fastq/meta.yml new file mode 100644 index 0000000..b1a1ed3 --- /dev/null +++ b/modules/nf-core/samtools/fastq/meta.yml @@ -0,0 +1,62 @@ +name: samtools_fastq +description: Converts a SAM/BAM/CRAM file to FASTQ +keywords: + - bam + - sam + - cram + - fastq +tools: + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: http://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + - interleave: + type: boolean + description: Set true for interleaved fastq file + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - fastq: + type: file + description: Compressed FASTQ file(s) with reads with either the READ1 or READ2 flag set in separate files. + pattern: "*_{1,2}.fastq.gz" + - interleaved: + type: file + description: Compressed FASTQ file with reads with either the READ1 or READ2 flag set in a combined file. Needs collated input file. + pattern: "*_interleaved.fastq.gz" + - singleton: + type: file + description: Compressed FASTQ file with singleton reads + pattern: "*_singleton.fastq.gz" + - other: + type: file + description: Compressed FASTQ file with reads with either both READ1 and READ2 flags set or unset + pattern: "*_other.fastq.gz" + +authors: + - "@priyanka-surana" + - "@suzannejin" diff --git a/nextflow_schema.json b/nextflow_schema.json index 1941893..943d436 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -304,7 +304,7 @@ "type": "boolean", "fa_icon": "fas fa-save", "description": "Save reads from samples that went through the host-removal step", - "help_text": "Save only the reads NOT mapped to the reference genome in FASTQ format (as exported from `samtools view` and `bam2fq`).\n\nThis can be useful if you wish to perform other analyses on the off-target reads from the host mapping, such as manual profiling or _de novo_ assembly." + "help_text": "Save only the reads NOT mapped to the reference genome in FASTQ format (as exported from `samtools view` and `fastq`).\n\nThis can be useful if you wish to perform other analyses on the off-target reads from the host mapping, such as manual profiling or _de novo_ assembly." } }, "fa_icon": "fas fa-user-times" diff --git a/subworkflows/local/longread_hostremoval.nf b/subworkflows/local/longread_hostremoval.nf index 83660fe..7242bbd 100644 --- a/subworkflows/local/longread_hostremoval.nf +++ b/subworkflows/local/longread_hostremoval.nf @@ -5,7 +5,7 @@ include { MINIMAP2_INDEX } from '../../modules/nf-core/minimap2/index/main' include { MINIMAP2_ALIGN } from '../../modules/nf-core/minimap2/align/main' include { SAMTOOLS_VIEW } from '../../modules/nf-core/samtools/view/main' -include { SAMTOOLS_BAM2FQ } from '../../modules/nf-core/samtools/bam2fq/main' +include { SAMTOOLS_FASTQ } from '../../modules/nf-core/samtools/fastq/main' include { SAMTOOLS_INDEX } from '../../modules/nf-core/samtools/index/main' include { SAMTOOLS_STATS } from '../../modules/nf-core/samtools/stats/main' @@ -38,8 +38,8 @@ workflow LONGREAD_HOSTREMOVAL { SAMTOOLS_VIEW ( ch_minimap2_mapped , [], [] ) ch_versions = ch_versions.mix( SAMTOOLS_VIEW.out.versions.first() ) - SAMTOOLS_BAM2FQ ( SAMTOOLS_VIEW.out.bam, false ) - ch_versions = ch_versions.mix( SAMTOOLS_BAM2FQ.out.versions.first() ) + SAMTOOLS_FASTQ ( SAMTOOLS_VIEW.out.bam, false ) + ch_versions = ch_versions.mix( SAMTOOLS_FASTQ.out.versions.first() ) // Indexing whole BAM for host removal statistics SAMTOOLS_INDEX ( MINIMAP2_ALIGN.out.bam ) @@ -54,7 +54,7 @@ workflow LONGREAD_HOSTREMOVAL { emit: stats = SAMTOOLS_STATS.out.stats //channel: [val(meta), [reads ] ] - reads = SAMTOOLS_BAM2FQ.out.reads // channel: [ val(meta), [ reads ] ] + reads = SAMTOOLS_FASTQ.out.fastq // channel: [ val(meta), [ reads ] ] versions = ch_versions // channel: [ versions.yml ] mqc = ch_multiqc_files }