1
0
Fork 0
mirror of https://github.com/MillironX/taxprofiler.git synced 2024-09-21 04:42:04 +00:00

Remove flagstat as bowtie2 reports this itself

This commit is contained in:
James Fellows Yates 2022-04-03 17:23:14 +02:00
parent 482112bb42
commit 066ceb2bca
9 changed files with 72 additions and 116 deletions

View file

@ -132,7 +132,6 @@ process {
]
}
withName: PORECHOP {
ext.prefix = { "${meta.id}_${meta.run_accession}" }
publishDir = [
@ -142,6 +141,24 @@ process {
]
}
withName: BOWTIE2_BUILD {
ext.prefix = { "${meta.id}_${meta.run_accession}" }
publishDir = [
path: { "${params.outdir}/bowtie2/build" },
mode: 'copy',
pattern: '*.bt2'
]
}
withName: BOWTIE2_ALIGN {
ext.prefix = { "${meta.id}_${meta.run_accession}" }
publishDir = [
path: { "${params.outdir}/bowtie2/align" },
mode: 'copy',
pattern: '*.{fastq.gz,bam}'
]
}
withName: CAT_FASTQ {
publishDir = [
path: { "${params.outdir}/prepared_sequences" },

View file

@ -22,10 +22,12 @@ params {
// Input data
// TODO nf-core: Specify the paths to your test data on nf-core/test-datasets
// TODO nf-core: Give any required params for the test so that command line flags are not needed
input = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv'
databases = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database.csv'
run_kraken2 = true
run_malt = true
shortread_clipmerge = true
input = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv'
databases = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database.csv'
run_kraken2 = true
run_malt = true
shortread_clipmerge = true
shortread_hostremoval = true
shortread_hostremoval_reference = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta'
}

View file

@ -36,9 +36,6 @@
"porechop": {
"git_sha": "e20e57f90b6787ac9a010a980cf6ea98bd990046"
},
"samtools/flagstat": {
"git_sha": "1ad73f1b2abdea9398680d6d20014838135c9a35"
},
"untar": {
"git_sha": "e080f4c8acf5760039ed12ec1f206170f3f9a918"
}

View file

@ -1,34 +0,0 @@
process SAMTOOLS_FLAGSTAT {
tag "$meta.id"
label 'process_low'
conda (params.enable_conda ? "bioconda::samtools=1.15" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/samtools:1.15--h1170115_1' :
'quay.io/biocontainers/samtools:1.15--h1170115_1' }"
input:
tuple val(meta), path(bam), path(bai)
output:
tuple val(meta), path("*.flagstat"), emit: flagstat
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
"""
samtools \\
flagstat \\
--threads ${task.cpus-1} \\
$bam \\
> ${bam}.flagstat
cat <<-END_VERSIONS > versions.yml
"${task.process}":
samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
END_VERSIONS
"""
}

View file

@ -1,49 +0,0 @@
name: samtools_flagstat
description: Counts the number of alignments in a BAM/CRAM/SAM file for each FLAG type
keywords:
- stats
- mapping
- counts
- bam
- sam
- cram
tools:
- samtools:
description: |
SAMtools is a set of utilities for interacting with and post-processing
short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li.
These files are generated as output by short read aligners like BWA.
homepage: http://www.htslib.org/
documentation: hhttp://www.htslib.org/doc/samtools.html
doi: 10.1093/bioinformatics/btp352
licence: ["MIT"]
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- bam:
type: file
description: BAM/CRAM/SAM file
pattern: "*.{bam,cram,sam}"
- bai:
type: file
description: Index for BAM/CRAM/SAM file
pattern: "*.{bai,crai,sai}"
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- flagstat:
type: file
description: File containing samtools flagstat output
pattern: "*.{flagstat}"
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
authors:
- "@drpatelh"

View file

@ -66,6 +66,7 @@ params {
longread_clip = false
// Host Removal
shortread_hostremoval = false
shortread_hostremoval_reference = null
shortread_hostremoval_index = null

View file

@ -266,8 +266,7 @@
"type": "boolean"
},
"shortread_clipmerge_excludeunmerged": {
"type": "boolean",
"default": false
"type": "boolean"
},
"longread_clip": {
"type": "boolean"
@ -304,6 +303,17 @@
"shortread_clipmerge_minlength": {
"type": "integer",
"default": 15
},
"shortread_hostremoval": {
"type": "boolean"
},
"shortread_hostremoval_reference": {
"type": "string",
"default": null
},
"shortread_hostremoval_index": {
"type": "string",
"default": null
}
}
}

View file

@ -2,38 +2,33 @@
// Remove host reads via alignment and export off-target reads
//
include { BOWTIE2_ALIGN } from '../../../modules/nf-core/modules/bowtie2/align/main'
include { BOWTIE2_BUILD } from '../../../modules/nf-core/modules/bowtie2/build/main'
include { SAMTOOLS_VIEW } from '../../../modules/nf-core/modules/samtools/view/main'
include { SAMTOOLS_FASTQ } from '../../../modules/nf-core/modules/samtools/fastq/main'
include { SAMTOOLS_FLAGSTAT } from '../../../modules/nf-core/modules/samtools/flagstat/main'
include { BOWTIE2_BUILD } from '../../modules/nf-core/modules/bowtie2/build/main'
include { BOWTIE2_ALIGN } from '../../modules/nf-core/modules/bowtie2/align/main'
workflow SHORTREAD_PREPROCESSING {
workflow SHORTREAD_HOSTREMOVAL {
take:
reads // [ [ meta ], [ reads ] ]
reference // /path/to/fasta
index // /path/to/index
main:
ch_versions = Channel.empty()
ch_multiqc_files = Channel.empty()
if ( !params.shortread_hostremoval_index ) {
file( , checkIfExists: true )
BOWTIE2_BUILD ( reference )
ch_versions = ch_versions.mix( BOWTIE2_BUILD.out.versions )
ch_bowtie2_index = BOWTIE2_BUILD ( reference ).index
ch_versions = ch_versions.mix( BOWTIE2_BUILD.out.versions )
} else {
ch_bowtie2_index = index.first()
}
BOWTIE2_ALIGN ( reads, BOWTIE2_BUILD.out.index )
ch_versions = ch_versions.mix( BOWTIE2_BUILD.out.versions )
ch_multiqc_files = ch_multiqc_files.mix( SAMTOOLS_FLAGSTAT.out.log )
SAMTOOLS_FLAGSTAT ( BOWTIE2_ALIGN.out.bam )
ch_versions = ch_versions.mix( SAMTOOLS_FLAGSTAT.out.versions )
ch_multiqc_files = ch_multiqc_files.mix( SAMTOOLS_FLAGSTAT.out.flagstat )
BOWTIE2_ALIGN ( reads, ch_bowtie2_index, true )
ch_versions = ch_versions.mix( BOWTIE2_ALIGN.out.versions.first() )
ch_multiqc_files = ch_multiqc_files.mix( BOWTIE2_ALIGN.out.log )
emit:
reads = BOWTIE2_ALIGN.out.fastq // channel: [ val(meta), [ reads ] ]
versions = ch_versions // channel: [ versions.yml ]
versions = ch_versions // channel: [ versions.yml ]
mqc = ch_multiqc_files
}

View file

@ -23,8 +23,11 @@ if (params.shortread_clipmerge_mergepairs && params.run_malt ) log.warn "[nf-cor
if (params.shortread_clipmerge_excludeunmerged && !params.shortread_clipmerge_mergepairs) exit 1, "[nf-core/taxprofiler] error: cannot include unmerged reads when merging not turned on. Please specify --shortread_clipmerge_mergepairs"
// TODO Add check if index but no reference exit 1
if (params.shortread_hostremoval_reference ) { ch_reference = file(params.shortread_hostremoval_reference) } else { }
if (params.shortread_hostremoval_index) { ch_reference_index = file(params.shortread_hostremoval_index ) } else { ch_reference_index = [] }
if (params.shortread_hostremoval && !params.shortread_hostremoval_reference) { exit 1, "[nf-core/taxprofiler] error: --shortread_hostremoval requested but no --shortread_hostremoval_reference FASTA supplied. Check input." }
if (!params.shortread_hostremoval_reference && params.shortread_hostremoval_reference_index) { exit 1, "[nf-core/taxprofiler] error: --shortread_hostremoval_index provided but no --shortread_hostremoval_reference FASTA supplied. Check input." }
if (params.shortread_hostremoval_reference ) { ch_reference = file(params.shortread_hostremoval_reference) } else { ch_reference = [] }
if (params.shortread_hostremoval_index ) { ch_reference_index = file(params.shortread_hostremoval_index ) } else { ch_reference_index = [] }
/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@ -49,6 +52,7 @@ include { INPUT_CHECK } from '../subworkflows/local/input_check'
include { DB_CHECK } from '../subworkflows/local/db_check'
include { SHORTREAD_PREPROCESSING } from '../subworkflows/local/shortread_preprocessing'
include { LONGREAD_PREPROCESSING } from '../subworkflows/local/longread_preprocessing'
include { SHORTREAD_HOSTREMOVAL } from '../subworkflows/local/shortread_hostremoval'
/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@ -120,17 +124,24 @@ workflow TAXPROFILER {
if ( params.longread_clip ) {
ch_longreads_preprocessed = LONGREAD_PREPROCESSING ( INPUT_CHECK.out.nanopore ).reads
.map { it -> [ it[0], [it[1]] ] }
ch_versions = ch_versions.mix(LONGREAD_PREPROCESSING.out.versions.first())
} else {
ch_versions = ch_versions.mix(LONGREAD_PREPROCESSING.out.versions.first())
} else {SHORTREAD_HOSTREMOVAL
ch_longreads_preprocessed = INPUT_CHECK.out.nanopore
}
if ( params.shortread_hostremoval ) {
ch_shortreads_hostremoved = SHORTREAD_HOSTREMOVAL ( ch_shortreads_preprocessed, ch_reference, ch_reference_index ).reads
ch_versions = ch_versions.mix(SHORTREAD_HOSTREMOVAL.out.versions.first())
} else {
ch_shortreads_hostremoved = ch_shortreads_preprocessed
}
/*
COMBINE READS WITH POSSIBLE DATABASES
*/
// e.g. output [DUMP: reads_plus_db] [['id':'2612', 'run_accession':'combined', 'instrument_platform':'ILLUMINA', 'single_end':1], <reads_path>/2612.merged.fastq.gz, ['tool':'malt', 'db_name':'mal95', 'db_params':'"-id 90"'], <db_path>/malt90]
ch_input_for_profiling = ch_shortreads_preprocessed
ch_input_for_profiling = ch_shortreads_hostremoved
.mix( ch_longreads_preprocessed )
.combine(DB_CHECK.out.dbs)
.branch {
@ -196,9 +207,15 @@ workflow TAXPROFILER {
if (params.shortread_clipmerge) {
ch_multiqc_files = ch_multiqc_files.mix(SHORTREAD_PREPROCESSING.out.mqc)
}
if (params.shortread_hostremoval) {
ch_multiqc_files = ch_multiqc_files.mix(SHORTREAD_HOSTREMOVAL.out.mqc.collect{it[1]}.ifEmpty([]))
}
if (params.longread_clip) {
ch_multiqc_files = ch_multiqc_files.mix(LONGREAD_PREPROCESSING.out.mqc)
}
if (params.run_kraken2) {
ch_multiqc_files = ch_multiqc_files.mix(KRAKEN2_KRAKEN2.out.txt.collect{it[1]}.ifEmpty([]))
ch_versions = ch_versions.mix(KRAKEN2_KRAKEN2.out.versions.first())