1
0
Fork 0
mirror of https://github.com/MillironX/taxprofiler.git synced 2024-09-21 08:42:03 +00:00

Merge branch 'nf-core:dev' into update_usage

This commit is contained in:
Sofia Stamouli 2022-12-20 14:11:05 +01:00 committed by GitHub
commit 502a4a81cd
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
16 changed files with 497 additions and 121 deletions

View file

@ -19,11 +19,20 @@ custom_logo_title: "nf-core/taxprofiler"
run_modules:
- fastqc
- adapterRemoval
- bbduk
- prinseqplusplus
- fastp
- filtlong
- bowtie2
- minimap2
- samtools
- kraken
- kaiju
- metaphlan
- diamond
- malt
- motus
- porechop
- custom_content
#extra_fn_clean_exts:
@ -36,16 +45,41 @@ top_modules:
name: "FastQC (pre-Trimming)"
path_filters:
- "*raw_*fastqc.zip"
- "fastqc":
name: "Falco (pre-Trimming)"
path_filters:
- "*_raw_falco_*_report.html"
- "fastp"
- "adapterRemoval"
- "porechop"
- "fastqc":
name: "FastQC (post-Trimming)"
path_filters:
- "*raw_*processed.zip"
- "*_processed_*fastqc.zip"
- "fastqc":
name: "Falco (post-Trimming)"
path_filters:
- "*_processed_falco_*_report.html"
- "bbduk"
- "prinseqplusplus"
- "filtlong"
- "bowtie2":
name: "bowtie2"
- "samtools":
name: "Samtools Stats"
- "kraken":
name: "Kraken"
path_filters:
- "*.kraken2.report.txt"
- "*.kraken2.kraken2.report.txt"
- "kraken":
name: "Bracken"
anchor: "bracken"
target: "Bracken"
doi: "10.7717/peerj-cs.104"
info: "Estimates species abundances in metagenomics samples by probabilistically re-distributing reads in the taxonomic tree."
extra: "Note: plot title will say Kraken2 due to the first step of bracken producing the same output format as Kraken. Abundance information is currently not supported in MultiQC."
path_filters:
- "*.bracken.kraken2.report.txt"
- "kraken":
name: "Centrifuge"
anchor: "centrifuge"
@ -55,3 +89,171 @@ top_modules:
extra: "Note: plot title will say Kraken2 due to Centrifuge producing the same output format as Kraken. If activated, see the actual Kraken2 results in the section above."
path_filters:
- "*.centrifuge.txt"
- "malt":
name: "MALT"
- "diamond"
- "kaiju":
name: "Kaiju"
- "motus"
#It is not possible to set placement for custom kraken and centrifuge columns.
table_columns_placement:
FastQC (pre-Trimming):
total_sequences: 100
avg_sequence_length: 110
percent_duplicates: 120
percent_gc: 130
percent_fails: 140
Falco (pre-Trimming):
total_sequences: 200
avg_sequence_length: 210
percent_duplicates: 220
percent_gc: 230
percent_fails: 240
fastp:
pct_adapter: 300
pct_surviving: 310
pct_duplication: 320
after_filtering_gc_content: 330
after_filtering_q30_rate: 340
after_filtering_q30_bases: 350
Adapter Removal:
aligned_total: 360
percent_aligned: 370
percent_collapsed: 380
percent_discarded: 390
FastQC (post-Trimming):
total_sequences: 400
avg_sequence_length: 410
percent_duplicates: 420
percent_gc: 430
percent_fails: 440
Falco (post-Trimming):
total_sequences: 500
avg_sequence_length: 510
percent_duplicates: 520
percent_gc: 530
percent_fails: 540
bowtie2:
overall_alignment_rate: 600
Samtools Stats:
raw_total_sequences: 700
reads_mapped: 710
reads_mapped_percent: 720
reads_properly_paired_percent: 730
non-primary_alignments: 740
reads_MQ0_percent: 750
error_rate: 760
MALT:
Num. of queries: 1000
Total reads: 1100
Mappability: 1200
Assig. Taxonomy: 1300
Taxonomic assignment success: 1400
Kaiju:
assigned: 2000
"% Assigned": 2100
"% Unclassified": 2200
table_columns_visible:
FastQC (pre-Trimming):
total_sequences: True
avg_sequence_length: True
percent_duplicates: True
percent_gc: True
percent_fails: False
Falco (pre-Trimming):
total_sequences: True
avg_sequence_length: True
percent_duplicates: True
percent_gc: True
percent_fails: False
fastp:
pct_adapter: True
pct_surviving: True
pct_duplication: False
after_filtering_gc_content: False
after_filtering_q30_rate: False
after_filtering_q30_bases: False
Adapter Removal:
aligned_total: True
percent_aligned: True
percent_collapsed: True
percent_discarded: False
FastQC (post-Trimming):
total_sequences: True
avg_sequence_length: True
percent_duplicates: False
percent_gc: False
percent_fails: False
Falco (post-Trimming):
total_sequences: True
avg_sequence_length: True
percent_duplicates: False
percent_gc: False
percent_fails: False
bowtie2:
overall_alignment_rate: True
Samtools Stats:
raw_total_sequences: True
reads_mapped: True
reads_mapped_percent: True
reads_properly_paired_percent: False
non-primary_alignments: False
reads_MQ0_percent: False
error_rate: False
Kraken:
"% Unclassified": True
"% Top 5": False
Bracken:
"% Unclassified": True
"% Top 5": False
Centrifuge:
"% Unclassified": True
"% Top 5": False
MALT:
Num. of queries: True
Total reads: True
Mappability: True
Assig. Taxonomy: False
Taxonomic assignment success: True
Kaiju:
assigned: False
"% Assigned": False
"% Unclassified": True
table_columns_name:
FastQC (pre-Trimming):
total_sequences: "Nr. Input Reads"
avg_sequence_length: "Length Input Reads"
percent_gc: "% GC Input Reads"
percent_duplicates: "% Dups Input Reads"
percent_fails: "% Failed Input Reads"
Falco (pre-Trimming):
total_sequences: "Nr. Input Reads"
avg_sequence_length: "Length Input Reads"
percent_gc: "% GC Input Reads"
percent_duplicates: "% Dups Input Reads"
percent_fails: "% Failed Input Reads"
FastQC (post-Trimming):
total_sequences: "Nr. Processed Reads"
avg_sequence_length: "Length Processed Reads"
percent_gc: "% GC Processed Reads"
percent_duplicates: "% Dups Processed Reads"
percent_fails: "% Failed Processed Reads"
Falco (post-Trimming):
total_sequences: "Nr. Processed Reads"
avg_sequence_length: "Length Processed Reads"
percent_gc: "% GC Processed Reads"
percent_duplicates: "% Dups Processed Reads"
percent_fails: "% Failed Processed Reads"
Samtools Stats:
raw_total_sequences: "Nr. Reads Into Mapping"
reads_mapped: "Nr. Mapped Reads"
reads_mapped_percent: "% Mapped Reads"
extra_fn_clean_exts:
- ".kraken2.kraken2.report.txt"
- ".centrifuge.txt"
- ".bracken.kraken2.report.txt"
- ".settings"

View file

@ -12,14 +12,6 @@
process {
withName: DATABASE_CHECK {
publishDir = [
path: { "${params.outdir}/pipeline_info" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}
withName: FASTQC {
ext.args = '--quiet'
ext.prefix = { "${meta.id}_${meta.run_accession}_raw" }
@ -41,7 +33,7 @@ process {
}
withName: FALCO {
ext.prefix = { "${meta.id}_${meta.run_accession}_raw" }
ext.prefix = { "${meta.id}_${meta.run_accession}_raw_falco" }
publishDir = [
path: { "${params.outdir}/falco/raw" },
mode: params.publish_dir_mode,
@ -50,7 +42,7 @@ process {
}
withName: FALCO_PROCESSED {
ext.prefix = { "${meta.id}_${meta.run_accession}_processed" }
ext.prefix = { "${meta.id}_${meta.run_accession}_processed_falco" }
publishDir = [
path: { "${params.outdir}/falco/processed" },
mode: params.publish_dir_mode,
@ -69,10 +61,17 @@ process {
].join(' ').trim()
ext.prefix = { "${meta.id}_${meta.run_accession}" }
publishDir = [
[
path: { "${params.outdir}/fastp" },
mode: params.publish_dir_mode,
pattern: '*.fastq.gz',
enabled: params.save_preprocessed_reads
],
[
path: { "${params.outdir}/fastp" },
mode: params.publish_dir_mode,
pattern: '*.{log,html,json}'
]
]
}
@ -90,10 +89,17 @@ process {
].join(' ').trim()
ext.prefix = { "${meta.id}_${meta.run_accession}" }
publishDir = [
[
path: { "${params.outdir}/fastp" },
mode: params.publish_dir_mode,
pattern: '*.fastq.gz',
enabled: params.save_preprocessed_reads
],
[
path: { "${params.outdir}/fastp" },
mode: params.publish_dir_mode,
pattern: '*.{log,html,json}'
]
]
}
@ -106,10 +112,17 @@ process {
].join(' ').trim()
ext.prefix = { "${meta.id}_${meta.run_accession}" }
publishDir = [
[
path: { "${params.outdir}/adapterremoval" },
mode: params.publish_dir_mode,
pattern: '*.fastq.gz',
enabled: params.save_preprocessed_reads
],
[
path: { "${params.outdir}/adapterremoval" },
mode: params.publish_dir_mode,
pattern: '*.settings'
]
]
}
@ -125,20 +138,34 @@ process {
].join(' ').trim()
ext.prefix = { "${meta.id}_${meta.run_accession}" }
publishDir = [
[
path: { "${params.outdir}/adapterremoval" },
mode: params.publish_dir_mode,
pattern: '*.fastq.gz',
enabled: params.save_preprocessed_reads
],
[
path: { "${params.outdir}/adapterremoval" },
mode: params.publish_dir_mode,
pattern: '*.settings'
]
]
}
withName: PORECHOP_PORECHOP {
ext.prefix = { "${meta.id}_${meta.run_accession}" }
publishDir = [
[
path: { "${params.outdir}/porechop" },
mode: params.publish_dir_mode,
pattern: '*.fastq.gz',
enabled: params.save_preprocessed_reads
],
[
path: { "${params.outdir}/porechop" },
mode: params.publish_dir_mode,
pattern: '*.log'
]
]
}
@ -151,10 +178,17 @@ process {
.join(' ').trim()
ext.prefix = { "${meta.id}_${meta.run_accession}_filtered" }
publishDir = [
[
path: { "${params.outdir}/filtlong" },
mode: params.publish_dir_mode,
pattern: '*.{fastq.gz,log}',
pattern: '*.fastq.gz',
enabled: params.save_preprocessed_reads
],
[
path: { "${params.outdir}/filtlong" },
mode: params.publish_dir_mode,
pattern: '*.log'
]
]
}
@ -248,10 +282,17 @@ process {
].join(' ').trim()
ext.prefix = { "${meta.id}-${meta.run_accession}" }
publishDir = [
[
path: { "${params.outdir}/bbduk/" },
mode: params.publish_dir_mode,
pattern: '*.{fastq.gz,log}',
enabled: params.save_complexityfiltered_reads
],
[
path: { "${params.outdir}/bbduk/" },
mode: params.publish_dir_mode,
pattern: '*.log'
]
]
}
@ -263,10 +304,17 @@ process {
].join(' ').trim()
ext.prefix = { "${meta.id}-${meta.run_accession}" }
publishDir = [
[
path: { "${params.outdir}/prinseqplusplus/" },
mode: params.publish_dir_mode,
pattern: '*{_good_out.fastq.gz,_good_out_R1.fastq.gz,_good_out_R2.fastq.gz,log}',
pattern: '*{_good_out.fastq.gz,_good_out_R1.fastq.gz,_good_out_R2.fastq.gz}',
enabled: params.save_complexityfiltered_reads
],
[
path: { "${params.outdir}/prinseqplusplus/" },
mode: params.publish_dir_mode,
pattern: '*.log'
]
]
}
@ -303,7 +351,7 @@ process {
withName: KRAKEN2_KRAKEN2 {
ext.args = params.kraken2_save_minimizers ? { "${meta.db_params} --report-minimizer-data" } : { "${meta.db_params}" }
ext.prefix = params.perform_runmerging ? { "${meta.id}-${meta.db_name}" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}" }
ext.prefix = params.perform_runmerging ? { meta.tool == "bracken" ? "${meta.id}-${meta.db_name}.bracken" : "${meta.id}-${meta.db_name}" } : { meta.tool == "bracken" ? "${meta.id}-${meta.run_accession}-${meta.db_name}.bracken" : "${meta.id}-${meta.run_accession}-${meta.db_name}" }
publishDir = [
path: { "${params.outdir}/kraken2/${meta.db_name}/" },
mode: params.publish_dir_mode,
@ -313,7 +361,7 @@ process {
withName: BRACKEN_BRACKEN {
errorStrategy = 'ignore'
ext.prefix = params.perform_runmerging ? { "${meta.id}-${meta.db_name}" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}" }
ext.prefix = params.perform_runmerging ? { "${meta.id}-${meta.db_name}.bracken" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}.bracken" }
publishDir = [
path: { "${params.outdir}/bracken/${meta.db_name}/" },
mode: params.publish_dir_mode,
@ -321,12 +369,21 @@ process {
]
}
withName: KRAKENTOOLS_COMBINEKREPORTS {
withName: BRACKEN_COMBINEBRACKENOUTPUTS {
ext.prefix = { "bracken_${meta.id}_combined_reports" }
publishDir = [
path: { "${params.outdir}/bracken/" },
mode: params.publish_dir_mode,
pattern: '*.txt'
]
}
withName: KRAKENTOOLS_COMBINEKREPORTS_KRAKEN {
ext.prefix = { "kraken2_${meta.id}_combined_reports" }
publishDir = [
path: { "${params.outdir}/kraken2/" },
mode: params.publish_dir_mode,
pattern: '*.{txt}'
pattern: '*.txt'
]
}

View file

@ -30,6 +30,11 @@
"git_sha": "8cab56516076b23c6f8eb1ac20ba4ce9692c85e1",
"installed_by": ["modules"]
},
"bracken/combinebrackenoutputs": {
"branch": "master",
"git_sha": "9c87d5fdad182590a370ea43a4ecebd200a6f6fb",
"installed_by": ["modules"]
},
"cat/fastq": {
"branch": "master",
"git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905",
@ -68,7 +73,8 @@
"falco": {
"branch": "master",
"git_sha": "fc959214036403ad83efe7a41d43d0606c445cda",
"installed_by": ["modules"]
"installed_by": ["modules"],
"patch": "modules/nf-core/falco/falco.diff"
},
"fastp": {
"branch": "master",
@ -167,12 +173,12 @@
},
"motus/merge": {
"branch": "master",
"git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905",
"git_sha": "3fce766123e71e82fb384db7d07b59180baa9ee9",
"installed_by": ["modules"]
},
"motus/profile": {
"branch": "master",
"git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905",
"git_sha": "3fce766123e71e82fb384db7d07b59180baa9ee9",
"installed_by": ["modules"]
},
"multiqc": {

View file

@ -1,29 +0,0 @@
process DATABASE_CHECK {
tag "$databasesheet"
label 'process_single'
conda (params.enable_conda ? "conda-forge::python=3.8.3" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/python:3.8.3' :
'quay.io/biocontainers/python:3.8.3' }"
input:
path databasesheet
output:
path '*.csv' , emit: csv
path "versions.yml", emit: versions
when:
task.ext.when == null || task.ext.when
script: // This script is bundled with the pipeline, in nf-core/taxprofiler/bin/
"""
cat $databasesheet >> database_sheet.valid.csv
cat <<-END_VERSIONS > versions.yml
"${task.process}":
python: \$(python --version | sed 's/Python //g')
END_VERSIONS
"""
}

View file

@ -0,0 +1,37 @@
process BRACKEN_COMBINEBRACKENOUTPUTS {
tag "$meta.id"
label 'process_low'
conda (params.enable_conda ? "bioconda::bracken=2.7" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/bracken:2.7--py39hc16433a_0':
'quay.io/biocontainers/bracken:2.7--py39hc16433a_0' }"
input:
tuple val(meta), path(input)
output:
tuple val(meta), path("*.txt"), emit: txt
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
// WARN: Version information not provided by tool on CLI.
// Please update version string below when bumping container versions.
def VERSION = '2.7'
"""
combine_bracken_outputs.py \\
$args \\
--files ${input} \\
-o ${prefix}.txt
cat <<-END_VERSIONS > versions.yml
"${task.process}":
combine_bracken_output: ${VERSION}
END_VERSIONS
"""
}

View file

@ -0,0 +1,41 @@
name: "bracken_combinebrackenoutputs"
description: Combine output of metagenomic samples analyzed by bracken.
keywords:
- sort
tools:
- "bracken":
description: Bracken (Bayesian Reestimation of Abundance with KrakEN) is a highly accurate statistical method that computes the abundance of species in DNA sequences from a metagenomics sample.
homepage: https://ccb.jhu.edu/software/bracken/
documentation: https://ccb.jhu.edu/software/bracken/index.shtml?t=manual
tool_dev_url: https://github.com/jenniferlu717/Bracken
doi: "10.7717/peerj-cs.104"
licence: ["GPL v3"]
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- input:
type: file
description: List of output files from bracken
pattern: "*"
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- txt:
type: file
description: Combined output in table format
pattern: "*.txt"
authors:
- "@jfy133"

16
modules/nf-core/falco/falco.diff generated Normal file
View file

@ -0,0 +1,16 @@
Changes in module 'nf-core/falco'
--- modules/nf-core/falco/main.nf
+++ modules/nf-core/falco/main.nf
@@ -33,7 +33,9 @@
"""
} else {
"""
- falco $args --threads $task.cpus ${reads}
+ [ ! -f ${prefix}_1.fastq.gz ] && ln -s ${reads[0]} ${prefix}_1.fastq.gz
+ [ ! -f ${prefix}_2.fastq.gz ] && ln -s ${reads[1]} ${prefix}_2.fastq.gz
+ falco $args --threads $task.cpus ${prefix}_1.fastq.gz ${prefix}_2.fastq.gz
cat <<-END_VERSIONS > versions.yml
"${task.process}":
************************************************************

View file

@ -33,7 +33,9 @@ process FALCO {
"""
} else {
"""
falco $args --threads $task.cpus ${reads}
[ ! -f ${prefix}_1.fastq.gz ] && ln -s ${reads[0]} ${prefix}_1.fastq.gz
[ ! -f ${prefix}_2.fastq.gz ] && ln -s ${reads[1]} ${prefix}_2.fastq.gz
falco $args --threads $task.cpus ${prefix}_1.fastq.gz ${prefix}_2.fastq.gz
cat <<-END_VERSIONS > versions.yml
"${task.process}":

View file

@ -1,13 +1,11 @@
VERSION = '3.0.1'
process MOTUS_MERGE {
tag "$meta.id"
label 'process_single'
conda (params.enable_conda ? "bioconda::motus=3.0.1" : null)
conda (params.enable_conda ? "bioconda::motus=3.0.3" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/motus:3.0.1--pyhdfd78af_0':
'quay.io/biocontainers/motus:3.0.1--pyhdfd78af_0' }"
'https://depot.galaxyproject.org/singularity/motus:3.0.3--pyhdfd78af_0':
'quay.io/biocontainers/motus:3.0.3--pyhdfd78af_0' }"
input:
tuple val(meta), path(input)

View file

@ -14,7 +14,7 @@ tools:
homepage: "https://motu-tool.org/"
documentation: "https://github.com/motu-tool/mOTUs/wiki"
tool_dev_url: "https://github.com/motu-tool/mOTUs"
doi: "10.1038/s41467-019-08844-4"
doi: "10.1186/s40168-022-01410-z"
licence: "['GPL v3']"
input:

View file

@ -2,10 +2,10 @@ process MOTUS_PROFILE {
tag "$meta.id"
label 'process_medium'
conda (params.enable_conda ? "bioconda::motus=3.0.1" : null)
conda (params.enable_conda ? "bioconda::motus=3.0.3" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/motus:3.0.1--pyhdfd78af_0':
'quay.io/biocontainers/motus:3.0.1--pyhdfd78af_0' }"
'https://depot.galaxyproject.org/singularity/motus:3.0.3--pyhdfd78af_0':
'quay.io/biocontainers/motus:3.0.3--pyhdfd78af_0' }"
input:
tuple val(meta), path(reads)

View file

@ -11,7 +11,7 @@ tools:
homepage: "https://motu-tool.org/"
documentation: "https://github.com/motu-tool/mOTUs/wiki"
tool_dev_url: "https://github.com/motu-tool/mOTUs"
doi: "10.1038/s41467-019-08844-4"
doi: "10.1186/s40168-022-01410-z"
licence: "['GPL v3']"
input:

View file

@ -2,7 +2,6 @@
// Check input samplesheet and get read channels
//
include { DATABASE_CHECK } from '../../modules/local/database_check'
include { UNTAR } from '../../modules/nf-core/untar/main'
workflow DB_CHECK {
@ -10,15 +9,27 @@ workflow DB_CHECK {
dbsheet // file: /path/to/dbsheet.csv
main:
ch_versions = Channel.empty()
// TODO: make database sheet check
// Checks:
// 1) no duplicates,
// 2) args do not have quotes, e.g. just `,,` and NOT `,"",`
parsed_samplesheet = DATABASE_CHECK ( dbsheet )
.csv
// special check to check _between_ rows, for which we must group rows together
// note: this will run in parallel to within-row validity, but we can assume this will run faster thus will fail first
Channel.fromPath(dbsheet)
.splitCsv ( header:true, sep:',' )
.map { create_db_channels(it) }
.map {[it.tool, it.db_name] }
.groupTuple()
.map {
tool, db_name ->
def unique_names = db_name.unique(false)
if ( unique_names.size() < db_name.size() ) exit 1, "[nf-core/taxprofiler] ERROR: Each database for a tool must have a unique name, duplicated detected. Tool: ${tool}, Database name: ${unique_names}"
}
// normal checks for within-row validity, so can be moved to separate functions
parsed_samplesheet = Channel.fromPath(dbsheet)
.splitCsv ( header:true, sep:',' )
.map {
validate_db_rows(it)
create_db_channels(it)
}
ch_dbs_for_untar = parsed_samplesheet
.branch {
@ -29,12 +40,32 @@ workflow DB_CHECK {
// TODO Filter to only run UNTAR on DBs of tools actually using?
// TODO make optional whether to save
UNTAR ( ch_dbs_for_untar.untar )
ch_versions = ch_versions.mix(UNTAR.out.versions.first())
ch_final_dbs = ch_dbs_for_untar.skip.mix( UNTAR.out.untar )
emit:
dbs = ch_final_dbs // channel: [ val(meta), [ db ] ]
versions = DATABASE_CHECK.out.versions.mix(UNTAR.out.versions.first()) // channel: [ versions.yml ]
versions = ch_versions // channel: [ versions.yml ]
}
def validate_db_rows(LinkedHashMap row){
// check minimum number of columns
if (row.size() < 4) exit 1, "[nf-core/taxprofiler] ERROR: Invalid database input sheet - malformed row (e.g. missing column). See documentation for more information. Error in: ${row}"
// all columns there
def expected_headers = ['tool', 'db_name', 'db_params', 'db_path']
if ( !row.keySet().containsAll(expected_headers) ) exit 1, "[nf-core/taxprofiler] ERROR: Invalid database input sheet - malformed column names. Please check input TSV. Column names should be: ${expected_keys.join(", ")}"
// valid tools specified// TIFNISIH LIST
def expected_tools = [ "bracken", "centrifuge", "diamond", "kaiju", "kraken2", "krakenuniq", "malt", "metaphlan3", "motus" ]
if ( !expected_tools.contains(row.tool) ) exit 1, "[nf-core/taxprofiler] ERROR: Invalid tool name. Please see documentation for all supported profilers. Error in: ${row}"
// detect quotes in params
if ( row.db_params.contains('"') ) exit 1, "[nf-core/taxprofiler] ERROR: Invalid database db_params entry. No quotes allowed. Error in: ${row}"
if ( row.db_params.contains("'") ) exit 1, "[nf-core/taxprofiler] ERROR: Invalid database db_params entry. No quotes allowed. Error in: ${row}"
}
def create_db_channels(LinkedHashMap row) {
@ -45,9 +76,11 @@ def create_db_channels(LinkedHashMap row) {
def array = []
if (!file(row.db_path, type: 'dir').exists()) {
exit 1, "ERROR: Please check input samplesheet -> database could not be found!\n${row.db_path}"
exit 1, "ERROR: Please check input samplesheet -> database path could not be found!\n${row.db_path}"
}
array = [ meta, file(row.db_path) ]
return array
}

View file

@ -41,14 +41,14 @@ workflow PROFILING {
}
.combine(databases)
.branch {
malt: it[2]['tool'] == 'malt'
kraken2: it[2]['tool'] == 'kraken2' || it[2]['tool'] == 'bracken' // to reuse the kraken module to produce the input data for bracken
metaphlan3: it[2]['tool'] == 'metaphlan3'
centrifuge: it[2]['tool'] == 'centrifuge'
kaiju: it[2]['tool'] == 'kaiju'
diamond: it[2]['tool'] == 'diamond'
motus: it[2]['tool'] == 'motus'
kaiju: it[2]['tool'] == 'kaiju'
kraken2: it[2]['tool'] == 'kraken2' || it[2]['tool'] == 'bracken' // to reuse the kraken module to produce the input data for bracken
krakenuniq: it[2]['tool'] == 'krakenuniq'
malt: it[2]['tool'] == 'malt'
metaphlan3: it[2]['tool'] == 'metaphlan3'
motus: it[2]['tool'] == 'motus'
unknown: true
}

View file

@ -2,6 +2,7 @@
// Standardise output files e.g. aggregation
//
include { BRACKEN_COMBINEBRACKENOUTPUTS } from '../../modules/nf-core/bracken/combinebrackenoutputs/main'
include { KAIJU_KAIJU2TABLE } from '../../modules/nf-core/kaiju/kaiju2table/main'
include { KRAKENTOOLS_COMBINEKREPORTS as KRAKENTOOLS_COMBINEKREPORTS_KRAKEN } from '../../modules/nf-core/krakentools/combinekreports/main'
include { KRAKENTOOLS_COMBINEKREPORTS as KRAKENTOOLS_COMBINEKREPORTS_CENTRIFUGE } from '../../modules/nf-core/krakentools/combinekreports/main'
@ -25,10 +26,11 @@ workflow STANDARDISATION_PROFILES {
*/
ch_input_profiles = profiles
.branch {
motus: it[0]['tool'] == 'motus'
kraken2: it[0]['tool'] == 'kraken2'
bracken: it[0]['tool'] == 'bracken'
centrifuge: it[0]['tool'] == 'centrifuge'
kraken2: it[0]['tool'] == 'kraken2'
metaphlan3: it[0]['tool'] == 'metaphlan3'
motus: it[0]['tool'] == 'motus'
unknown: true
}
@ -49,6 +51,17 @@ workflow STANDARDISATION_PROFILES {
Standardise and aggregate
*/
// Bracken
ch_profiles_for_bracken = ch_input_profiles.bracken
.map { [it[0]['db_name'], it[1]] }
.groupTuple()
.map {
[[id:it[0]], it[1]]
}
BRACKEN_COMBINEBRACKENOUTPUTS ( ch_profiles_for_bracken )
// CENTRIFUGE
// Collect and replace id for db_name for prefix

View file

@ -25,7 +25,7 @@ if ( params.input ) {
exit 1, "Input samplesheet, or PEP config and base directory not specified"
}
if (params.databases) { ch_databases = file(params.databases) } else { exit 1, 'Input database sheet not specified!' }
if (params.databases) { ch_databases = file(params.databases, checkIfExists: true) } else { exit 1, 'Input database sheet not specified!' }
if (params.shortread_qc_mergepairs && params.run_malt ) log.warn "[nf-core/taxprofiler] MALT does not accept uncollapsed paired-reads. Pairs will be profiled as separate files."
if (params.shortread_qc_includeunmerged && !params.shortread_qc_mergepairs) exit 1, "ERROR: [nf-core/taxprofiler] cannot include unmerged reads when merging is not turned on. Please specify --shortread_qc_mergepairs"