1
0
Fork 0
mirror of https://github.com/MillironX/taxprofiler.git synced 2024-11-10 23:13:09 +00:00

Merge pull request #77 from genomic-medicine-sweden/add_nanopore_host_reads_removal_with_minimap2

Add nanopore host reads removal with minimap2
This commit is contained in:
Lauri Mesilaakso 2022-05-06 12:29:17 +03:00 committed by GitHub
commit e52a40cc9d
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
17 changed files with 540 additions and 20 deletions

View file

@ -164,6 +164,47 @@ process {
] ]
} }
withName: MINIMAP2_INDEX {
ext.args = '-x map-ont'
publishDir = [
path: { "${params.outdir}/minimap2/index" },
mode: params.publish_dir_mode,
enabled: params.save_hostremoval_index,
pattern: 'minimap2'
]
}
withName: MINIMAP2_ALIGN {
ext.prefix = { "${meta.id}_${meta.run_accession}" }
publishDir = [
path: { "${params.outdir}/minimap2/align" },
mode: params.publish_dir_mode,
enabled: params.save_hostremoval_mapped,
pattern: '*.bam'
]
}
withName: SAMTOOLS_VIEW {
ext.args = '-f 4'
ext.prefix = { "${meta.id}.mapped.sorted" }
publishDir = [
path: { "${params.outdir}/samtools/view" },
mode: params.publish_dir_mode,
enabled: params.save_hostremoval_unmapped,
pattern: '*.bam'
]
}
withName: SAMTOOLS_BAM2FQ {
ext.prefix = { "${meta.id}_${meta.run_accession}" }
publishDir = [
path: { "${params.outdir}/samtools/bam2fq" },
mode: params.publish_dir_mode,
enabled: params.save_hostremoval_unmapped,
pattern: '*.fq.gz'
]
}
withName: BBMAP_BBDUK { withName: BBMAP_BBDUK {
ext.args = [ ext.args = [
"entropy=${params.shortread_complexityfilter_entropy}", "entropy=${params.shortread_complexityfilter_entropy}",

View file

@ -28,7 +28,8 @@ params {
perform_longread_clip = false perform_longread_clip = false
perform_shortread_complexityfilter = true perform_shortread_complexityfilter = true
perform_shortread_hostremoval = true perform_shortread_hostremoval = true
shortread_hostremoval_reference = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta' perform_longread_hostremoval = true
hostremoval_reference = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta'
run_kaiju = true run_kaiju = true
run_kraken2 = true run_kraken2 = true
run_malt = true run_malt = true

View file

@ -191,13 +191,13 @@ You can optionally save the FASTQ output of the run merging with the `--save_com
#### Host Removal #### Host Removal
Removal of possible-host reads from FASTQ files prior profiling can be activated with `--perform_shortread_hostremoval` Removal of possible-host reads from FASTQ files prior profiling can be activated with `--perform_shortread_hostremoval` or `--perform_longread_hostremoval`.
Similarly to complexity filtering, host-removal can be useful for runtime optimisation and reduction in misclassified reads. It is not always necessary to report classification of reads from a host when you already know the host of the sample, therefore you can gain a run-time and computational advantage by removing these prior typically resource-heavy profiling with more efficient methods. Furthermore, particularly with human samples, you can reduce the number of false positives during profiling that occur due to host-sequence contamination in reference genomes on public databases. Similarly to complexity filtering, host-removal can be useful for runtime optimisation and reduction in misclassified reads. It is not always necessary to report classification of reads from a host when you already know the host of the sample, therefore you can gain a run-time and computational advantage by removing these prior typically resource-heavy profiling with more efficient methods. Furthermore, particularly with human samples, you can reduce the number of false positives during profiling that occur due to host-sequence contamination in reference genomes on public databases.
nf-core/taxprofiler currently offers host-removal via alignment against a reference genome with Bowtie2, and the use of the unaligned reads for downstream profiling. nf-core/taxprofiler currently offers host-removal via alignment against a reference genome with Bowtie2, and the use of the unaligned reads for downstream profiling.
You can supply your reference genome in FASTA format with `--shortread_hostremoval_reference`. You can also optionally supply a directory containing pre-indexed Bowtie2 index files with `--shortread_hostremoval_index`, however nf-core/taxprofiler will generate this for you if necessary. Pre-supplying the directory of index files can greatly speed up the process, and these can be re-used. You can supply your reference genome in FASTA format with `--hostremoval_reference`. You can also optionally supply a directory containing pre-indexed Bowtie2 index files with `--shortread_hostremoval_index` or `--longread_hostremoval_index`, however nf-core/taxprofiler will generate this for you if necessary. Pre-supplying the directory of index files can greatly speed up the process, and these can be re-used.
> 💡 If you have multiple taxa or sequences you wish to remove (e.g., the host genome and then also PhiX - common quality-control reagent during sequencing) you can simply concatenate the FASTAs of each taxa or sequences into a single reference file. > 💡 If you have multiple taxa or sequences you wish to remove (e.g., the host genome and then also PhiX - common quality-control reagent during sequencing) you can simply concatenate the FASTAs of each taxa or sequences into a single reference file.

View file

@ -52,6 +52,12 @@
"git_sha": "2d38566eca4cc15142b2ffa7c11837569b39aece" "git_sha": "2d38566eca4cc15142b2ffa7c11837569b39aece"
}, },
"metaphlan3": { "metaphlan3": {
"git_sha": "ed4dd1a928ebf4308efb720de878045f7773f8e2"
},
"minimap2/align": {
"git_sha": "1a5a9e7b4009dcf34e6867dd1a5a1d9a718b027b"
},
"minimap2/index": {
"git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d"
}, },
"multiqc": { "multiqc": {
@ -63,6 +69,12 @@
"prinseqplusplus": { "prinseqplusplus": {
"git_sha": "f1c5384c31e985591716afdd732cf8c2ae29d05b" "git_sha": "f1c5384c31e985591716afdd732cf8c2ae29d05b"
}, },
"samtools/bam2fq": {
"git_sha": "5510ea39fe638594bc26ac34cadf4a84bf27d159"
},
"samtools/view": {
"git_sha": "6b64f9cb6c3dd3577931cc3cd032d6fb730000ce"
},
"untar": { "untar": {
"git_sha": "e080f4c8acf5760039ed12ec1f206170f3f9a918" "git_sha": "e080f4c8acf5760039ed12ec1f206170f3f9a918"
} }

View file

@ -23,7 +23,7 @@ process METAPHLAN3 {
script: script:
def args = task.ext.args ?: '' def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}" def prefix = task.ext.prefix ?: "${meta.id}"
def input_type = ("$input".endsWith(".fastq.gz")) ? "--input_type fastq" : ("$input".contains(".fasta")) ? "--input_type fasta" : ("$input".endsWith(".bowtie2out.txt")) ? "--input_type bowtie2out" : "--input_type sam" def input_type = ("$input".endsWith(".fastq.gz") || "$input".endsWith(".fq.gz")) ? "--input_type fastq" : ("$input".contains(".fasta")) ? "--input_type fasta" : ("$input".endsWith(".bowtie2out.txt")) ? "--input_type bowtie2out" : "--input_type sam"
def input_data = ("$input_type".contains("fastq")) && !meta.single_end ? "${input[0]},${input[1]}" : "$input" def input_data = ("$input_type".contains("fastq")) && !meta.single_end ? "${input[0]},${input[1]}" : "$input"
def bowtie2_out = "$input_type" == "--input_type bowtie2out" || "$input_type" == "--input_type sam" ? '' : "--bowtie2out ${prefix}.bowtie2out.txt" def bowtie2_out = "$input_type" == "--input_type bowtie2out" || "$input_type" == "--input_type sam" ? '' : "--bowtie2out ${prefix}.bowtie2out.txt"

View file

@ -0,0 +1,48 @@
process MINIMAP2_ALIGN {
tag "$meta.id"
label 'process_medium'
conda (params.enable_conda ? 'bioconda::minimap2=2.21 bioconda::samtools=1.12' : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/mulled-v2-66534bcbb7031a148b13e2ad42583020b9cd25c4:1679e915ddb9d6b4abda91880c4b48857d471bd8-0' :
'quay.io/biocontainers/mulled-v2-66534bcbb7031a148b13e2ad42583020b9cd25c4:1679e915ddb9d6b4abda91880c4b48857d471bd8-0' }"
input:
tuple val(meta), path(reads)
path reference
val bam_format
val cigar_paf_format
val cigar_bam
output:
tuple val(meta), path("*.paf"), optional: true, emit: paf
tuple val(meta), path("*.bam"), optional: true, emit: bam
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def input_reads = meta.single_end ? "$reads" : "${reads[0]} ${reads[1]}"
def bam_output = bam_format ? "-a | samtools sort | samtools view -@ ${task.cpus} -b -h -o ${prefix}.bam" : "-o ${prefix}.paf"
def cigar_paf = cigar_paf_format && !bam_format ? "-c" : ''
def set_cigar_bam = cigar_bam && bam_format ? "-L" : ''
"""
minimap2 \\
$args \\
-t $task.cpus \\
$reference \\
$input_reads \\
$cigar_paf \\
$set_cigar_bam \\
$bam_output
cat <<-END_VERSIONS > versions.yml
"${task.process}":
minimap2: \$(minimap2 --version 2>&1)
END_VERSIONS
"""
}

View file

@ -0,0 +1,65 @@
name: minimap2_align
description: A versatile pairwise aligner for genomic and spliced nucleotide sequences
keywords:
- align
- fasta
- fastq
- genome
- paf
- reference
tools:
- minimap2:
description: |
A versatile pairwise aligner for genomic and spliced nucleotide sequences.
homepage: https://github.com/lh3/minimap2
documentation: https://github.com/lh3/minimap2#uguide
licence: ["MIT"]
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- reads:
type: file
description: |
List of input FASTA or FASTQ files of size 1 and 2 for single-end
and paired-end data, respectively.
- reference:
type: file
description: |
Reference database in FASTA format.
- bam_format:
type: boolean
description: Specify that output should be in BAM format
- cigar_paf_format:
type: boolean
description: Specify that output CIGAR should be in PAF format
- cigar_bam:
type: boolean
description: |
Write CIGAR with >65535 ops at the CG tag. This is recommended when
doing XYZ (https://github.com/lh3/minimap2#working-with-65535-cigar-operations)
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- paf:
type: file
description: Alignment in PAF format
pattern: "*.paf"
- bam:
type: file
description: Alignment in BAM format
pattern: "*.bam"
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
authors:
- "@heuermh"
- "@sofstam"
- "@sateeshperi"
- "@jfy133"

View file

@ -0,0 +1,33 @@
process MINIMAP2_INDEX {
label 'process_medium'
conda (params.enable_conda ? 'bioconda::minimap2=2.21' : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/minimap2:2.21--h5bf99c6_0' :
'quay.io/biocontainers/minimap2:2.21--h5bf99c6_0' }"
input:
path fasta
output:
path "*.mmi" , emit: index
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
"""
minimap2 \\
-t $task.cpus \\
-d ${fasta.baseName}.mmi \\
$args \\
$fasta
cat <<-END_VERSIONS > versions.yml
"${task.process}":
minimap2: \$(minimap2 --version 2>&1)
END_VERSIONS
"""
}

View file

@ -0,0 +1,30 @@
name: minimap2_index
description: Provides fasta index required by minimap2 alignment.
keywords:
- index
- fasta
- reference
tools:
- minimap2:
description: |
A versatile pairwise aligner for genomic and spliced nucleotide sequences.
homepage: https://github.com/lh3/minimap2
documentation: https://github.com/lh3/minimap2#uguide
licence: ["MIT"]
input:
- fasta:
type: file
description: |
Reference database in FASTA format.
output:
- mmi:
type: file
description: Minimap2 fasta index.
pattern: "*.mmi"
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
authors:
- "@yuukiiwa"
- "@drpatelh"

View file

@ -0,0 +1,56 @@
process SAMTOOLS_BAM2FQ {
tag "$meta.id"
label 'process_low'
conda (params.enable_conda ? "bioconda::samtools=1.15.1" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/samtools:1.15.1--h1170115_0' :
'quay.io/biocontainers/samtools:1.15.1--h1170115_0' }"
input:
tuple val(meta), path(inputbam)
val split
output:
tuple val(meta), path("*.fq.gz"), emit: reads
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
if (split){
"""
samtools \\
bam2fq \\
$args \\
-@ $task.cpus \\
-1 ${prefix}_1.fq.gz \\
-2 ${prefix}_2.fq.gz \\
-0 ${prefix}_other.fq.gz \\
-s ${prefix}_singleton.fq.gz \\
$inputbam
cat <<-END_VERSIONS > versions.yml
"${task.process}":
samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
END_VERSIONS
"""
} else {
"""
samtools \\
bam2fq \\
$args \\
-@ $task.cpus \\
$inputbam | gzip --no-name > ${prefix}_interleaved.fq.gz
cat <<-END_VERSIONS > versions.yml
"${task.process}":
samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
END_VERSIONS
"""
}
}

View file

@ -0,0 +1,55 @@
name: samtools_bam2fq
description: |
The module uses bam2fq method from samtools to
convert a SAM, BAM or CRAM file to FASTQ format
keywords:
- bam2fq
- samtools
- fastq
tools:
- samtools:
description: Tools for dealing with SAM, BAM and CRAM files
homepage: None
documentation: http://www.htslib.org/doc/1.1/samtools.html
tool_dev_url: None
doi: ""
licence: ["MIT"]
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- inputbam:
type: file
description: BAM/CRAM/SAM file
pattern: "*.{bam,cram,sam}"
- split:
type: boolean
description: |
TRUE/FALSE value to indicate if reads should be separated into
/1, /2 and if present other, or singleton.
Note: choosing TRUE will generate 4 different files.
Choosing FALSE will produce a single file, which will be interleaved in case
the input contains paired reads.
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- reads:
type: file
description: |
FASTQ files, which will be either a group of 4 files (read_1, read_2, other and singleton)
or a single interleaved .fq.gz file if the user chooses not to split the reads.
pattern: "*.fq.gz"
authors:
- "@lescai"

View file

@ -0,0 +1,56 @@
process SAMTOOLS_VIEW {
tag "$meta.id"
label 'process_medium'
conda (params.enable_conda ? "bioconda::samtools=1.15.1" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/samtools:1.15.1--h1170115_0' :
'quay.io/biocontainers/samtools:1.15.1--h1170115_0' }"
input:
tuple val(meta), path(input), path(index)
path fasta
output:
tuple val(meta), path("*.bam") , emit: bam , optional: true
tuple val(meta), path("*.cram"), emit: cram, optional: true
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
def args2 = task.ext.args2 ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def reference = fasta ? "--reference ${fasta} -C" : ""
def file_type = input.getExtension()
if ("$input" == "${prefix}.${file_type}") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!"
"""
samtools \\
view \\
--threads ${task.cpus-1} \\
${reference} \\
$args \\
$input \\
$args2 \\
> ${prefix}.${file_type}
cat <<-END_VERSIONS > versions.yml
"${task.process}":
samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
END_VERSIONS
"""
stub:
def prefix = task.ext.prefix ?: "${meta.id}"
"""
touch ${prefix}.bam
touch ${prefix}.cram
cat <<-END_VERSIONS > versions.yml
"${task.process}":
samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
END_VERSIONS
"""
}

View file

@ -0,0 +1,57 @@
name: samtools_view
description: filter/convert SAM/BAM/CRAM file
keywords:
- view
- bam
- sam
- cram
tools:
- samtools:
description: |
SAMtools is a set of utilities for interacting with and post-processing
short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li.
These files are generated as output by short read aligners like BWA.
homepage: http://www.htslib.org/
documentation: hhttp://www.htslib.org/doc/samtools.html
doi: 10.1093/bioinformatics/btp352
licence: ["MIT"]
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- input:
type: file
description: BAM/CRAM/SAM file
pattern: "*.{bam,cram,sam}"
- index:
type: optional file
description: BAM.BAI/CRAM.CRAI file
pattern: "*.{.bai,.crai}"
- fasta:
type: optional file
description: Reference file the CRAM was created with
pattern: "*.{fasta,fa}"
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- bam:
type: file
description: filtered/converted BAM/SAM file
pattern: "*.{bam,sam}"
- cram:
type: file
description: filtered/converted CRAM file
pattern: "*.cram"
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
authors:
- "@drpatelh"
- "@joseespinosa"
- "@FriederikeHanssen"

View file

@ -82,12 +82,15 @@ params {
// Host Removal // Host Removal
perform_shortread_hostremoval = false perform_shortread_hostremoval = false
shortread_hostremoval_reference = null perform_longread_hostremoval = false
hostremoval_reference = null
shortread_hostremoval_index = null shortread_hostremoval_index = null
longread_hostremoval_index = null
save_hostremoval_index = false save_hostremoval_index = false
save_hostremoval_mapped = false save_hostremoval_mapped = false
save_hostremoval_unmapped = false save_hostremoval_unmapped = false
// MALT // MALT
run_malt = false run_malt = false
malt_mode = 'BlastN' malt_mode = 'BlastN'

View file

@ -362,7 +362,10 @@
"perform_shortread_hostremoval": { "perform_shortread_hostremoval": {
"type": "boolean" "type": "boolean"
}, },
"shortread_hostremoval_reference": { "perform_longread_hostremoval": {
"type": "boolean"
},
"hostremoval_reference": {
"type": "string", "type": "string",
"default": "None" "default": "None"
}, },
@ -397,6 +400,10 @@
"type": "string", "type": "string",
"default": "tsv", "default": "tsv",
"enum": ["blast", "xml", "txt", "daa", "sam", "tsv", "paf"] "enum": ["blast", "xml", "txt", "daa", "sam", "tsv", "paf"]
},
"longread_hostremoval_index": {
"type": "string",
"default": "None"
} }
} }
} }

View file

@ -0,0 +1,47 @@
//
// Remove host reads via alignment and export off-target reads
//
include { MINIMAP2_INDEX } from '../../modules/nf-core/modules/minimap2/index/main'
include { MINIMAP2_ALIGN } from '../../modules/nf-core/modules/minimap2/align/main'
include { SAMTOOLS_VIEW } from '../../modules/nf-core/modules/samtools/view/main'
include { SAMTOOLS_BAM2FQ } from '../../modules/nf-core/modules/samtools/bam2fq/main'
workflow LONGREAD_HOSTREMOVAL {
take:
reads // [ [ meta ], [ reads ] ]
reference // /path/to/fasta
index // /path/to/index
main:
ch_versions = Channel.empty()
ch_multiqc_files = Channel.empty()
if ( !params.longread_hostremoval_index ) {
ch_minimap2_index = MINIMAP2_INDEX ( reference ).index
ch_versions = ch_versions.mix( MINIMAP2_INDEX.out.versions )
} else {
ch_minimap2_index = index
}
MINIMAP2_ALIGN ( reads, ch_minimap2_index, true, false, false )
ch_versions = ch_versions.mix( MINIMAP2_ALIGN.out.versions.first() )
ch_minimap2_mapped = MINIMAP2_ALIGN.out.bam
.map {
meta, reads ->
[ meta, reads, [] ]
}
SAMTOOLS_VIEW ( ch_minimap2_mapped , [] )
ch_versions = ch_versions.mix( SAMTOOLS_VIEW.out.versions.first() )
SAMTOOLS_BAM2FQ ( SAMTOOLS_VIEW.out.bam, false )
ch_versions = ch_versions.mix( SAMTOOLS_BAM2FQ.out.versions.first() )
emit:
reads = SAMTOOLS_BAM2FQ.out.reads // channel: [ val(meta), [ reads ] ]
versions = ch_versions // channel: [ versions.yml ]
}

View file

@ -11,7 +11,7 @@ WorkflowTaxprofiler.initialise(params, log)
// TODO nf-core: Add all file path parameters for the pipeline to the list below // TODO nf-core: Add all file path parameters for the pipeline to the list below
// Check input path parameters to see if they exist // Check input path parameters to see if they exist
def checkPathParamList = [ params.input, params.databases, params.shortread_hostremoval_reference, def checkPathParamList = [ params.input, params.databases, params.hostremoval_reference,
params.shortread_hostremoval_index, params.multiqc_config params.shortread_hostremoval_index, params.multiqc_config
] ]
for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true) } } for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true) } }
@ -22,11 +22,12 @@ if (params.databases) { ch_databases = file(params.databases) } else { exit 1, '
if (params.shortread_clipmerge_mergepairs && params.run_malt ) log.warn "[nf-core/taxprofiler] MALT does not accept uncollapsed paired-reads. Pairs will be profiled as separate files." if (params.shortread_clipmerge_mergepairs && params.run_malt ) log.warn "[nf-core/taxprofiler] MALT does not accept uncollapsed paired-reads. Pairs will be profiled as separate files."
if (params.shortread_clipmerge_excludeunmerged && !params.shortread_clipmerge_mergepairs) exit 1, "ERROR: [nf-core/taxprofiler] cannot include unmerged reads when merging not turned on. Please specify --shortread_clipmerge_mergepairs" if (params.shortread_clipmerge_excludeunmerged && !params.shortread_clipmerge_mergepairs) exit 1, "ERROR: [nf-core/taxprofiler] cannot include unmerged reads when merging not turned on. Please specify --shortread_clipmerge_mergepairs"
if (params.perform_shortread_hostremoval && !params.shortread_hostremoval_reference) { exit 1, "ERROR: [nf-core/taxprofiler] --shortread_hostremoval requested but no --shortread_hostremoval_reference FASTA supplied. Check input." } if (params.perform_shortread_hostremoval && !params.hostremoval_reference) { exit 1, "ERROR: [nf-core/taxprofiler] --shortread_hostremoval requested but no --hostremoval_reference FASTA supplied. Check input." }
if (!params.shortread_hostremoval_reference && params.shortread_hostremoval_reference_index) { exit 1, "ERROR: [nf-core/taxprofiler] --shortread_hostremoval_index provided but no --shortread_hostremoval_reference FASTA supplied. Check input." } if (!params.hostremoval_reference && params.hostremoval_reference_index) { exit 1, "ERROR: [nf-core/taxprofiler] --shortread_hostremoval_index provided but no --hostremoval_reference FASTA supplied. Check input." }
if (params.shortread_hostremoval_reference ) { ch_reference = file(params.shortread_hostremoval_reference) } if (params.hostremoval_reference ) { ch_reference = file(params.hostremoval_reference) }
if (params.shortread_hostremoval_index ) { ch_reference_index = file(params.shortread_hostremoval_index ) } else { ch_reference_index = [] } if (params.shortread_hostremoval_index ) { ch_shortread_reference_index = file(params.shortread_hostremoval_index ) } else { ch_shortread_reference_index = [] }
if (params.longread_hostremoval_index ) { ch_longread_reference_index = file(params.longread_hostremoval_index ) } else { ch_longread_reference_index = [] }
/* /*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@ -52,6 +53,7 @@ include { DB_CHECK } from '../subworkflows/local/db_check'
include { SHORTREAD_PREPROCESSING } from '../subworkflows/local/shortread_preprocessing' include { SHORTREAD_PREPROCESSING } from '../subworkflows/local/shortread_preprocessing'
include { LONGREAD_PREPROCESSING } from '../subworkflows/local/longread_preprocessing' include { LONGREAD_PREPROCESSING } from '../subworkflows/local/longread_preprocessing'
include { SHORTREAD_HOSTREMOVAL } from '../subworkflows/local/shortread_hostremoval' include { SHORTREAD_HOSTREMOVAL } from '../subworkflows/local/shortread_hostremoval'
include { LONGREAD_HOSTREMOVAL } from '../subworkflows/local/longread_hostremoval'
include { SHORTREAD_COMPLEXITYFILTERING } from '../subworkflows/local/shortread_complexityfiltering' include { SHORTREAD_COMPLEXITYFILTERING } from '../subworkflows/local/shortread_complexityfiltering'
include { PROFILING } from '../subworkflows/local/profiling' include { PROFILING } from '../subworkflows/local/profiling'
@ -141,16 +143,23 @@ workflow TAXPROFILER {
*/ */
if ( params.perform_shortread_hostremoval ) { if ( params.perform_shortread_hostremoval ) {
ch_shortreads_hostremoved = SHORTREAD_HOSTREMOVAL ( ch_shortreads_filtered, ch_reference, ch_reference_index ).reads ch_shortreads_hostremoved = SHORTREAD_HOSTREMOVAL ( ch_shortreads_filtered, ch_reference, ch_shortread_reference_index ).reads
ch_versions = ch_versions.mix(SHORTREAD_HOSTREMOVAL.out.versions) ch_versions = ch_versions.mix(SHORTREAD_HOSTREMOVAL.out.versions)
} else { } else {
ch_shortreads_hostremoved = ch_shortreads_filtered ch_shortreads_hostremoved = ch_shortreads_filtered
} }
if ( params.perform_longread_hostremoval ) {
ch_longreads_hostremoved = LONGREAD_HOSTREMOVAL ( ch_longreads_preprocessed, ch_reference, ch_longread_reference_index ).reads
ch_versions = ch_versions.mix(LONGREAD_HOSTREMOVAL.out.versions)
} else {
ch_longreads_hostremoved = ch_longreads_preprocessed
}
if ( params.perform_runmerging ) { if ( params.perform_runmerging ) {
ch_reads_for_cat_branch = ch_shortreads_hostremoved ch_reads_for_cat_branch = ch_shortreads_hostremoved
.mix( ch_longreads_preprocessed ) .mix( ch_longreads_hostremoved )
.map { .map {
meta, reads -> meta, reads ->
def meta_new = meta.clone() def meta_new = meta.clone()
@ -182,7 +191,7 @@ workflow TAXPROFILER {
} else { } else {
ch_reads_runmerged = ch_shortreads_hostremoved ch_reads_runmerged = ch_shortreads_hostremoved
.mix( ch_longreads_preprocessed, INPUT_CHECK.out.fasta ) .mix( ch_longreads_hostremoved, INPUT_CHECK.out.fasta )
} }
/* /*