From 066ceb2bcaeb21c0a2a07fd1a5359e30277869ce Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Sun, 3 Apr 2022 17:23:14 +0200 Subject: [PATCH] Remove flagstat as bowtie2 reports this itself --- conf/modules.config | 19 ++++++- conf/test.config | 12 +++-- modules.json | 3 -- .../nf-core/modules/samtools/flagstat/main.nf | 34 ------------- .../modules/samtools/flagstat/meta.yml | 49 ------------------- nextflow.config | 1 + nextflow_schema.json | 14 +++++- subworkflows/local/shortread_hostremoval.nf | 29 +++++------ workflows/taxprofiler.nf | 27 ++++++++-- 9 files changed, 72 insertions(+), 116 deletions(-) delete mode 100644 modules/nf-core/modules/samtools/flagstat/main.nf delete mode 100644 modules/nf-core/modules/samtools/flagstat/meta.yml diff --git a/conf/modules.config b/conf/modules.config index dc8b138..41faa62 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -132,7 +132,6 @@ process { ] } - withName: PORECHOP { ext.prefix = { "${meta.id}_${meta.run_accession}" } publishDir = [ @@ -142,6 +141,24 @@ process { ] } + withName: BOWTIE2_BUILD { + ext.prefix = { "${meta.id}_${meta.run_accession}" } + publishDir = [ + path: { "${params.outdir}/bowtie2/build" }, + mode: 'copy', + pattern: '*.bt2' + ] + } + + withName: BOWTIE2_ALIGN { + ext.prefix = { "${meta.id}_${meta.run_accession}" } + publishDir = [ + path: { "${params.outdir}/bowtie2/align" }, + mode: 'copy', + pattern: '*.{fastq.gz,bam}' + ] + } + withName: CAT_FASTQ { publishDir = [ path: { "${params.outdir}/prepared_sequences" }, diff --git a/conf/test.config b/conf/test.config index 92a10e4..90ea241 100644 --- a/conf/test.config +++ b/conf/test.config @@ -22,10 +22,12 @@ params { // Input data // TODO nf-core: Specify the paths to your test data on nf-core/test-datasets // TODO nf-core: Give any required params for the test so that command line flags are not needed - input = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv' - databases = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database.csv' - run_kraken2 = true - run_malt = true - shortread_clipmerge = true + input = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv' + databases = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database.csv' + run_kraken2 = true + run_malt = true + shortread_clipmerge = true + shortread_hostremoval = true + shortread_hostremoval_reference = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta' } diff --git a/modules.json b/modules.json index 7c3facc..7395d68 100644 --- a/modules.json +++ b/modules.json @@ -36,9 +36,6 @@ "porechop": { "git_sha": "e20e57f90b6787ac9a010a980cf6ea98bd990046" }, - "samtools/flagstat": { - "git_sha": "1ad73f1b2abdea9398680d6d20014838135c9a35" - }, "untar": { "git_sha": "e080f4c8acf5760039ed12ec1f206170f3f9a918" } diff --git a/modules/nf-core/modules/samtools/flagstat/main.nf b/modules/nf-core/modules/samtools/flagstat/main.nf deleted file mode 100644 index 9e3440a..0000000 --- a/modules/nf-core/modules/samtools/flagstat/main.nf +++ /dev/null @@ -1,34 +0,0 @@ -process SAMTOOLS_FLAGSTAT { - tag "$meta.id" - label 'process_low' - - conda (params.enable_conda ? "bioconda::samtools=1.15" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.15--h1170115_1' : - 'quay.io/biocontainers/samtools:1.15--h1170115_1' }" - - input: - tuple val(meta), path(bam), path(bai) - - output: - tuple val(meta), path("*.flagstat"), emit: flagstat - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - """ - samtools \\ - flagstat \\ - --threads ${task.cpus-1} \\ - $bam \\ - > ${bam}.flagstat - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') - END_VERSIONS - """ -} diff --git a/modules/nf-core/modules/samtools/flagstat/meta.yml b/modules/nf-core/modules/samtools/flagstat/meta.yml deleted file mode 100644 index 9526906..0000000 --- a/modules/nf-core/modules/samtools/flagstat/meta.yml +++ /dev/null @@ -1,49 +0,0 @@ -name: samtools_flagstat -description: Counts the number of alignments in a BAM/CRAM/SAM file for each FLAG type -keywords: - - stats - - mapping - - counts - - bam - - sam - - cram -tools: - - samtools: - description: | - SAMtools is a set of utilities for interacting with and post-processing - short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. - These files are generated as output by short read aligners like BWA. - homepage: http://www.htslib.org/ - documentation: hhttp://www.htslib.org/doc/samtools.html - doi: 10.1093/bioinformatics/btp352 - licence: ["MIT"] -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - bam: - type: file - description: BAM/CRAM/SAM file - pattern: "*.{bam,cram,sam}" - - bai: - type: file - description: Index for BAM/CRAM/SAM file - pattern: "*.{bai,crai,sai}" -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - flagstat: - type: file - description: File containing samtools flagstat output - pattern: "*.{flagstat}" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" -authors: - - "@drpatelh" diff --git a/nextflow.config b/nextflow.config index 8aa8722..e559a85 100644 --- a/nextflow.config +++ b/nextflow.config @@ -66,6 +66,7 @@ params { longread_clip = false // Host Removal + shortread_hostremoval = false shortread_hostremoval_reference = null shortread_hostremoval_index = null diff --git a/nextflow_schema.json b/nextflow_schema.json index fb2ca31..0b5162b 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -266,8 +266,7 @@ "type": "boolean" }, "shortread_clipmerge_excludeunmerged": { - "type": "boolean", - "default": false + "type": "boolean" }, "longread_clip": { "type": "boolean" @@ -304,6 +303,17 @@ "shortread_clipmerge_minlength": { "type": "integer", "default": 15 + }, + "shortread_hostremoval": { + "type": "boolean" + }, + "shortread_hostremoval_reference": { + "type": "string", + "default": null + }, + "shortread_hostremoval_index": { + "type": "string", + "default": null } } } diff --git a/subworkflows/local/shortread_hostremoval.nf b/subworkflows/local/shortread_hostremoval.nf index 4b0861c..505f989 100644 --- a/subworkflows/local/shortread_hostremoval.nf +++ b/subworkflows/local/shortread_hostremoval.nf @@ -2,38 +2,33 @@ // Remove host reads via alignment and export off-target reads // -include { BOWTIE2_ALIGN } from '../../../modules/nf-core/modules/bowtie2/align/main' -include { BOWTIE2_BUILD } from '../../../modules/nf-core/modules/bowtie2/build/main' -include { SAMTOOLS_VIEW } from '../../../modules/nf-core/modules/samtools/view/main' -include { SAMTOOLS_FASTQ } from '../../../modules/nf-core/modules/samtools/fastq/main' -include { SAMTOOLS_FLAGSTAT } from '../../../modules/nf-core/modules/samtools/flagstat/main' +include { BOWTIE2_BUILD } from '../../modules/nf-core/modules/bowtie2/build/main' +include { BOWTIE2_ALIGN } from '../../modules/nf-core/modules/bowtie2/align/main' -workflow SHORTREAD_PREPROCESSING { +workflow SHORTREAD_HOSTREMOVAL { take: reads // [ [ meta ], [ reads ] ] reference // /path/to/fasta + index // /path/to/index main: ch_versions = Channel.empty() ch_multiqc_files = Channel.empty() if ( !params.shortread_hostremoval_index ) { - file( , checkIfExists: true ) - BOWTIE2_BUILD ( reference ) - ch_versions = ch_versions.mix( BOWTIE2_BUILD.out.versions ) + ch_bowtie2_index = BOWTIE2_BUILD ( reference ).index + ch_versions = ch_versions.mix( BOWTIE2_BUILD.out.versions ) + } else { + ch_bowtie2_index = index.first() } - BOWTIE2_ALIGN ( reads, BOWTIE2_BUILD.out.index ) - ch_versions = ch_versions.mix( BOWTIE2_BUILD.out.versions ) - ch_multiqc_files = ch_multiqc_files.mix( SAMTOOLS_FLAGSTAT.out.log ) - - SAMTOOLS_FLAGSTAT ( BOWTIE2_ALIGN.out.bam ) - ch_versions = ch_versions.mix( SAMTOOLS_FLAGSTAT.out.versions ) - ch_multiqc_files = ch_multiqc_files.mix( SAMTOOLS_FLAGSTAT.out.flagstat ) + BOWTIE2_ALIGN ( reads, ch_bowtie2_index, true ) + ch_versions = ch_versions.mix( BOWTIE2_ALIGN.out.versions.first() ) + ch_multiqc_files = ch_multiqc_files.mix( BOWTIE2_ALIGN.out.log ) emit: reads = BOWTIE2_ALIGN.out.fastq // channel: [ val(meta), [ reads ] ] - versions = ch_versions // channel: [ versions.yml ] + versions = ch_versions // channel: [ versions.yml ] mqc = ch_multiqc_files } diff --git a/workflows/taxprofiler.nf b/workflows/taxprofiler.nf index c5678f1..631aee6 100644 --- a/workflows/taxprofiler.nf +++ b/workflows/taxprofiler.nf @@ -23,8 +23,11 @@ if (params.shortread_clipmerge_mergepairs && params.run_malt ) log.warn "[nf-cor if (params.shortread_clipmerge_excludeunmerged && !params.shortread_clipmerge_mergepairs) exit 1, "[nf-core/taxprofiler] error: cannot include unmerged reads when merging not turned on. Please specify --shortread_clipmerge_mergepairs" // TODO Add check if index but no reference exit 1 -if (params.shortread_hostremoval_reference ) { ch_reference = file(params.shortread_hostremoval_reference) } else { } -if (params.shortread_hostremoval_index) { ch_reference_index = file(params.shortread_hostremoval_index ) } else { ch_reference_index = [] } +if (params.shortread_hostremoval && !params.shortread_hostremoval_reference) { exit 1, "[nf-core/taxprofiler] error: --shortread_hostremoval requested but no --shortread_hostremoval_reference FASTA supplied. Check input." } +if (!params.shortread_hostremoval_reference && params.shortread_hostremoval_reference_index) { exit 1, "[nf-core/taxprofiler] error: --shortread_hostremoval_index provided but no --shortread_hostremoval_reference FASTA supplied. Check input." } + +if (params.shortread_hostremoval_reference ) { ch_reference = file(params.shortread_hostremoval_reference) } else { ch_reference = [] } +if (params.shortread_hostremoval_index ) { ch_reference_index = file(params.shortread_hostremoval_index ) } else { ch_reference_index = [] } /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -49,6 +52,7 @@ include { INPUT_CHECK } from '../subworkflows/local/input_check' include { DB_CHECK } from '../subworkflows/local/db_check' include { SHORTREAD_PREPROCESSING } from '../subworkflows/local/shortread_preprocessing' include { LONGREAD_PREPROCESSING } from '../subworkflows/local/longread_preprocessing' +include { SHORTREAD_HOSTREMOVAL } from '../subworkflows/local/shortread_hostremoval' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -120,17 +124,24 @@ workflow TAXPROFILER { if ( params.longread_clip ) { ch_longreads_preprocessed = LONGREAD_PREPROCESSING ( INPUT_CHECK.out.nanopore ).reads .map { it -> [ it[0], [it[1]] ] } - ch_versions = ch_versions.mix(LONGREAD_PREPROCESSING.out.versions.first()) - } else { + ch_versions = ch_versions.mix(LONGREAD_PREPROCESSING.out.versions.first()) + } else {SHORTREAD_HOSTREMOVAL ch_longreads_preprocessed = INPUT_CHECK.out.nanopore } + if ( params.shortread_hostremoval ) { + ch_shortreads_hostremoved = SHORTREAD_HOSTREMOVAL ( ch_shortreads_preprocessed, ch_reference, ch_reference_index ).reads + ch_versions = ch_versions.mix(SHORTREAD_HOSTREMOVAL.out.versions.first()) + } else { + ch_shortreads_hostremoved = ch_shortreads_preprocessed + } + /* COMBINE READS WITH POSSIBLE DATABASES */ // e.g. output [DUMP: reads_plus_db] [['id':'2612', 'run_accession':'combined', 'instrument_platform':'ILLUMINA', 'single_end':1], /2612.merged.fastq.gz, ['tool':'malt', 'db_name':'mal95', 'db_params':'"-id 90"'], /malt90] - ch_input_for_profiling = ch_shortreads_preprocessed + ch_input_for_profiling = ch_shortreads_hostremoved .mix( ch_longreads_preprocessed ) .combine(DB_CHECK.out.dbs) .branch { @@ -196,9 +207,15 @@ workflow TAXPROFILER { if (params.shortread_clipmerge) { ch_multiqc_files = ch_multiqc_files.mix(SHORTREAD_PREPROCESSING.out.mqc) } + + if (params.shortread_hostremoval) { + ch_multiqc_files = ch_multiqc_files.mix(SHORTREAD_HOSTREMOVAL.out.mqc.collect{it[1]}.ifEmpty([])) + } + if (params.longread_clip) { ch_multiqc_files = ch_multiqc_files.mix(LONGREAD_PREPROCESSING.out.mqc) } + if (params.run_kraken2) { ch_multiqc_files = ch_multiqc_files.mix(KRAKEN2_KRAKEN2.out.txt.collect{it[1]}.ifEmpty([])) ch_versions = ch_versions.mix(KRAKEN2_KRAKEN2.out.versions.first())