1
0
Fork 0
mirror of https://github.com/MillironX/taxprofiler.git synced 2024-11-22 17:59:55 +00:00

Merge pull request #162 from genomic-medicine-sweden/short_reads_samtools

Add samtools stats for short-reads
This commit is contained in:
James A. Fellows Yates 2022-11-04 14:39:29 +01:00 committed by GitHub
commit 67e264b7a9
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 30 additions and 7 deletions

View file

@ -34,9 +34,10 @@ On release, automated continuous integration tests run the pipeline on a full-si
2. Performs optional read pre-processing
- Adapter clipping and merging (short-read: [fastp](https://github.com/OpenGene/fastp), [AdapterRemoval2](https://github.com/MikkelSchubert/adapterremoval); long-read: [porechop](https://github.com/rrwick/Porechop))
- Low complexity and quality filtering (short-read: [bbduk](https://jgi.doe.gov/data-and-tools/software-tools/bbtools/), [PRINSEQ++](https://github.com/Adrian-Cantu/PRINSEQ-plus-plus); long-read: [Filtlong](https://github.com/rrwick/Filtlong))
- Host read removal (short-read: [BowTie2](http://bowtie-bio.sourceforge.net/bowtie2/); long-read: [Minimap2](https://github.com/lh3/minimap2))
- Host-read removal (short-read: [BowTie2](http://bowtie-bio.sourceforge.net/bowtie2/); long-read: [Minimap2](https://github.com/lh3/minimap2))
- Run merging
3. Performs taxonomic profiling using one or more of:
3. Supports statistics for host-read removal ([Samtools](http://www.htslib.org/))
4. Performs taxonomic profiling using one or more of:
- [Kraken2](https://ccb.jhu.edu/software/kraken2/)
- [MetaPhlAn3](https://huttenhower.sph.harvard.edu/metaphlan/)
- [MALT](https://uni-tuebingen.de/fakultaeten/mathematisch-naturwissenschaftliche-fakultaet/fachbereiche/informatik/lehrstuehle/algorithms-in-bioinformatics/software/malt/)
@ -45,11 +46,11 @@ On release, automated continuous integration tests run the pipeline on a full-si
- [Kaiju](https://kaiju.binf.ku.dk/)
- [mOTUs](https://motu-tool.org/)
- [KrakenUniq](https://github.com/fbreitwieser/krakenuniq)
4. Perform optional post-processing with:
5. Perform optional post-processing with:
- [bracken](https://ccb.jhu.edu/software/bracken/)
5. Standardises output tables
6. Present QC for raw reads ([`MultiQC`](http://multiqc.info/))
7. Plotting Kraken2, Centrifuge, Kaiju and MALT results ([`Krona`](https://hpc.nih.gov/apps/kronatools.html))
6. Standardises output tables
7. Present QC for raw reads ([`MultiQC`](http://multiqc.info/))
8. Plotting Kraken2, Centrifuge, Kaiju and MALT results ([`Krona`](https://hpc.nih.gov/apps/kronatools.html))
## Quick Start

View file

@ -236,7 +236,6 @@ process {
publishDir = [
path: { "${params.outdir}/samtools/stats" },
mode: params.publish_dir_mode,
enabled: params.save_hostremoval_unmapped,
pattern: '*stats'
]
}

View file

@ -4,6 +4,9 @@
include { BOWTIE2_BUILD } from '../../modules/nf-core/bowtie2/build/main'
include { BOWTIE2_ALIGN } from '../../modules/nf-core/bowtie2/align/main'
include { SAMTOOLS_INDEX } from '../../modules/nf-core/samtools/index/main'
include { SAMTOOLS_STATS } from '../../modules/nf-core/samtools/stats/main'
include { SAMTOOLS_VIEW } from '../../modules/nf-core/samtools/view/main'
workflow SHORTREAD_HOSTREMOVAL {
take:
@ -26,7 +29,27 @@ workflow SHORTREAD_HOSTREMOVAL {
ch_versions = ch_versions.mix( BOWTIE2_ALIGN.out.versions.first() )
ch_multiqc_files = ch_multiqc_files.mix( BOWTIE2_ALIGN.out.log )
ch_bowtie2_mapped = BOWTIE2_ALIGN.out.bam
.map {
meta, reads ->
[ meta, reads, [] ]
}
SAMTOOLS_VIEW ( ch_bowtie2_mapped, [], [] )
ch_versions = ch_versions.mix( SAMTOOLS_VIEW.out.versions.first() )
SAMTOOLS_INDEX ( SAMTOOLS_VIEW.out.bam )
ch_versions = ch_versions.mix( SAMTOOLS_INDEX.out.versions.first() )
bam_bai = SAMTOOLS_VIEW.out.bam
.join(SAMTOOLS_INDEX.out.bai, remainder: true)
SAMTOOLS_STATS ( bam_bai, reference )
ch_versions = ch_versions.mix(SAMTOOLS_STATS.out.versions.first())
ch_multiqc_files = ch_multiqc_files.mix( SAMTOOLS_STATS.out.stats )
emit:
stats = SAMTOOLS_STATS.out.stats
reads = BOWTIE2_ALIGN.out.fastq // channel: [ val(meta), [ reads ] ]
versions = ch_versions // channel: [ versions.yml ]
mqc = ch_multiqc_files