mirror of
https://github.com/MillironX/taxprofiler.git
synced 2024-12-22 15:28:16 +00:00
Merge branch 'dev' into metaphlan3/mergemetaphlantables
This commit is contained in:
commit
cdb21d17b3
12 changed files with 203 additions and 55 deletions
|
@ -12,6 +12,8 @@
|
|||
|
||||
## Introduction
|
||||
|
||||
> ⚠️ This pipeline is still under development! While the pipeline is usable, not all functionality will be available!
|
||||
|
||||
<!-- TODO nf-core: Write a 1-2 sentence summary of what data the pipeline is for and what it does -->
|
||||
|
||||
**nf-core/taxprofiler** is a bioinformatics best-practice analysis pipeline for taxonomic profiling of shotgun metagenomic data. It allows for in-parallel profiling with multiple profiling tools against multiple databases, produces standardised output tables.
|
||||
|
|
|
@ -294,6 +294,15 @@ process {
|
|||
]
|
||||
}
|
||||
|
||||
withName: KRAKENTOOLS_COMBINEKREPORTS {
|
||||
ext.prefix = { "kraken2_${meta.id}_combined_reports" }
|
||||
publishDir = [
|
||||
path: { "${params.outdir}/kraken2/" },
|
||||
mode: params.publish_dir_mode,
|
||||
pattern: '*.{txt}'
|
||||
]
|
||||
}
|
||||
|
||||
withName: KRONA_CLEANUP {
|
||||
ext.prefix = params.perform_runmerging ? { "${meta.id}-${meta.db_name}" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}" }
|
||||
publishDir = [
|
||||
|
@ -367,6 +376,15 @@ process {
|
|||
]
|
||||
}
|
||||
|
||||
withName: KRAKENTOOLS_COMBINEKREPORTS_CENTRIFUGE {
|
||||
ext.prefix = { "centrifuge_${meta.id}_combined_reports" }
|
||||
publishDir = [
|
||||
path: { "${params.outdir}/centrifuge/" },
|
||||
mode: params.publish_dir_mode,
|
||||
pattern: '*.{txt}'
|
||||
]
|
||||
}
|
||||
|
||||
withName: KAIJU_KAIJU {
|
||||
ext.prefix = params.perform_runmerging ? { "${meta.id}-${meta.db_name}" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}" }
|
||||
publishDir = [
|
||||
|
@ -378,7 +396,7 @@ process {
|
|||
}
|
||||
|
||||
withName: KAIJU_KAIJU2TABLE {
|
||||
ext.prefix = { "${meta.id}_combined_reports" }
|
||||
ext.prefix = { "kaiju_${meta.id}_combined_reports" }
|
||||
publishDir = [
|
||||
path: { "${params.outdir}/kaiju/" },
|
||||
mode: params.publish_dir_mode,
|
||||
|
|
|
@ -410,3 +410,13 @@ We recommend adding the following line to your environment to limit this (typica
|
|||
```bash
|
||||
NXF_OPTS='-Xms1g -Xmx4g'
|
||||
```
|
||||
|
||||
## Troubleshooting and FAQs
|
||||
|
||||
### I get a warning during centrifuge_kreport process with exit status 255.
|
||||
|
||||
When a sample has insufficient hits for abundance estimation, the resulting `report.txt` file will be empty.
|
||||
|
||||
When trying to convert this to a kraken-style report, the conversion tool will exit with a status code `255`, and provide a `WARN`.
|
||||
|
||||
This is **not** an error nor a failure of the pipeline, just your sample has no hits to the provided database when using centrifuge.
|
||||
|
|
|
@ -43,8 +43,7 @@
|
|||
},
|
||||
"fastp": {
|
||||
"branch": "master",
|
||||
"git_sha": "7e8ad566883449e7939062b5e2bcf53fc1e0002f",
|
||||
"patch": "modules/nf-core/modules/fastp/fastp.diff"
|
||||
"git_sha": "2c70c1c1951aaf884d2e8d8d9c871db79f7b35aa"
|
||||
},
|
||||
"fastqc": {
|
||||
"branch": "master",
|
||||
|
@ -74,6 +73,10 @@
|
|||
"branch": "master",
|
||||
"git_sha": "409a308ba46284d8ebb48c2c1befd6f6433db3f7"
|
||||
},
|
||||
"krakentools/combinekreports": {
|
||||
"branch": "master",
|
||||
"git_sha": "ee0346b4d14ffdc15ce7e093ca1363cd07c9bd78"
|
||||
},
|
||||
"krakentools/kreport2krona": {
|
||||
"branch": "master",
|
||||
"git_sha": "233fa70811a03a4cecb2ece483b5c8396e2cee1d"
|
||||
|
@ -140,7 +143,7 @@
|
|||
},
|
||||
"untar": {
|
||||
"branch": "master",
|
||||
"git_sha": "5e7b1ef9a5a2d9258635bcbf70fcf37dacd1b247"
|
||||
"git_sha": "393dbd6ddafe3f18eac02893dd4a21e4d45de679"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
33
modules/nf-core/modules/fastp/fastp.diff
generated
33
modules/nf-core/modules/fastp/fastp.diff
generated
|
@ -1,33 +0,0 @@
|
|||
Changes in module 'nf-core/modules/fastp'
|
||||
--- modules/nf-core/modules/fastp/main.nf
|
||||
+++ modules/nf-core/modules/fastp/main.nf
|
||||
@@ -33,9 +33,8 @@
|
||||
def fail_fastq = save_trimmed_fail ? "--failed_out ${prefix}.fail.fastq.gz" : ''
|
||||
"""
|
||||
[ ! -f ${prefix}.fastq.gz ] && ln -sf $reads ${prefix}.fastq.gz
|
||||
- cat ${prefix}.fastq.gz \\
|
||||
- | fastp \\
|
||||
- --stdin \\
|
||||
+
|
||||
+ fastp \\
|
||||
--stdout \\
|
||||
--in1 ${prefix}.fastq.gz \\
|
||||
--thread $task.cpus \\
|
||||
@@ -45,6 +44,7 @@
|
||||
$args \\
|
||||
2> ${prefix}.fastp.log \\
|
||||
| gzip -c > ${prefix}.fastp.fastq.gz
|
||||
+
|
||||
cat <<-END_VERSIONS > versions.yml
|
||||
"${task.process}":
|
||||
fastp: \$(fastp --version 2>&1 | sed -e "s/fastp //g")
|
||||
@@ -69,6 +69,7 @@
|
||||
--detect_adapter_for_pe \\
|
||||
$args \\
|
||||
2> ${prefix}.fastp.log
|
||||
+
|
||||
|
||||
cat <<-END_VERSIONS > versions.yml
|
||||
"${task.process}":
|
||||
|
||||
************************************************************
|
30
modules/nf-core/modules/fastp/main.nf
generated
30
modules/nf-core/modules/fastp/main.nf
generated
|
@ -26,14 +26,14 @@ process FASTP {
|
|||
|
||||
script:
|
||||
def args = task.ext.args ?: ''
|
||||
// Added soft-links to original fastqs for consistent naming in MultiQC
|
||||
def prefix = task.ext.prefix ?: "${meta.id}"
|
||||
def fail_fastq = save_trimmed_fail && meta.single_end ? "--failed_out ${prefix}.fail.fastq.gz" : save_trimmed_fail && !meta.single_end ? "--unpaired1 ${prefix}_1.fail.fastq.gz --unpaired2 ${prefix}_2.fail.fastq.gz" : ''
|
||||
// Added soft-links to original fastqs for consistent naming in MultiQC
|
||||
// Use single ended for interleaved. Add --interleaved_in in config.
|
||||
if (meta.single_end) {
|
||||
def fail_fastq = save_trimmed_fail ? "--failed_out ${prefix}.fail.fastq.gz" : ''
|
||||
if ( task.ext.args?.contains('--interleaved_in') ) {
|
||||
"""
|
||||
[ ! -f ${prefix}.fastq.gz ] && ln -sf $reads ${prefix}.fastq.gz
|
||||
|
||||
|
||||
fastp \\
|
||||
--stdout \\
|
||||
--in1 ${prefix}.fastq.gz \\
|
||||
|
@ -45,13 +45,32 @@ process FASTP {
|
|||
2> ${prefix}.fastp.log \\
|
||||
| gzip -c > ${prefix}.fastp.fastq.gz
|
||||
|
||||
cat <<-END_VERSIONS > versions.yml
|
||||
"${task.process}":
|
||||
fastp: \$(fastp --version 2>&1 | sed -e "s/fastp //g")
|
||||
END_VERSIONS
|
||||
"""
|
||||
} else if (meta.single_end) {
|
||||
"""
|
||||
[ ! -f ${prefix}.fastq.gz ] && ln -sf $reads ${prefix}.fastq.gz
|
||||
|
||||
fastp \\
|
||||
--stdout \\
|
||||
--in1 ${prefix}.fastq.gz \\
|
||||
--out1 ${prefix}.fastp.fastq.gz \\
|
||||
--thread $task.cpus \\
|
||||
--json ${prefix}.fastp.json \\
|
||||
--html ${prefix}.fastp.html \\
|
||||
$fail_fastq \\
|
||||
$args \\
|
||||
2> ${prefix}.fastp.log
|
||||
|
||||
cat <<-END_VERSIONS > versions.yml
|
||||
"${task.process}":
|
||||
fastp: \$(fastp --version 2>&1 | sed -e "s/fastp //g")
|
||||
END_VERSIONS
|
||||
"""
|
||||
} else {
|
||||
def fail_fastq = save_trimmed_fail ? "--unpaired1 ${prefix}_1.fail.fastq.gz --unpaired2 ${prefix}_2.fail.fastq.gz" : ''
|
||||
def merge_fastq = save_merged ? "-m --merged_out ${prefix}.merged.fastq.gz" : ''
|
||||
"""
|
||||
[ ! -f ${prefix}_1.fastq.gz ] && ln -sf ${reads[0]} ${prefix}_1.fastq.gz
|
||||
|
@ -69,7 +88,6 @@ process FASTP {
|
|||
--detect_adapter_for_pe \\
|
||||
$args \\
|
||||
2> ${prefix}.fastp.log
|
||||
|
||||
|
||||
cat <<-END_VERSIONS > versions.yml
|
||||
"${task.process}":
|
||||
|
|
3
modules/nf-core/modules/fastp/meta.yml
generated
3
modules/nf-core/modules/fastp/meta.yml
generated
|
@ -21,7 +21,8 @@ input:
|
|||
type: file
|
||||
description: |
|
||||
List of input FastQ files of size 1 and 2 for single-end and paired-end data,
|
||||
respectively.
|
||||
respectively. If you wish to run interleaved paired-end data, supply as single-end data
|
||||
but with `--interleaved_in` in your `modules.conf`'s `ext.args` for the module.
|
||||
- save_trimmed_fail:
|
||||
type: boolean
|
||||
description: Specify true to save files that failed to pass trimming thresholds ending in `*.fail.fastq.gz`
|
||||
|
|
34
modules/nf-core/modules/krakentools/combinekreports/main.nf
generated
Normal file
34
modules/nf-core/modules/krakentools/combinekreports/main.nf
generated
Normal file
|
@ -0,0 +1,34 @@
|
|||
process KRAKENTOOLS_COMBINEKREPORTS {
|
||||
label 'process_low'
|
||||
|
||||
conda (params.enable_conda ? "bioconda::krakentools=1.2" : null)
|
||||
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
|
||||
'https://depot.galaxyproject.org/singularity/krakentools:1.2--pyh5e36f6f_0':
|
||||
'quay.io/biocontainers/krakentools:1.2--pyh5e36f6f_0' }"
|
||||
|
||||
input:
|
||||
tuple val(meta), path(kreports)
|
||||
|
||||
output:
|
||||
tuple val(meta), path("*.txt"), emit: txt
|
||||
path "versions.yml", emit: versions
|
||||
|
||||
when:
|
||||
task.ext.when == null || task.ext.when
|
||||
|
||||
script:
|
||||
def args = task.ext.args ?: ''
|
||||
prefix = task.ext.prefix ?: "${meta.id}"
|
||||
def VERSION = '1.2' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions.
|
||||
"""
|
||||
combine_kreports.py \\
|
||||
-r ${kreports} \\
|
||||
-o ${prefix}.txt \\
|
||||
${args}
|
||||
|
||||
cat <<-END_VERSIONS > versions.yml
|
||||
"${task.process}":
|
||||
combine_kreports.py: ${VERSION}
|
||||
END_VERSIONS
|
||||
"""
|
||||
}
|
43
modules/nf-core/modules/krakentools/combinekreports/meta.yml
generated
Normal file
43
modules/nf-core/modules/krakentools/combinekreports/meta.yml
generated
Normal file
|
@ -0,0 +1,43 @@
|
|||
name: krakentools_combinekreports
|
||||
description: Takes a Kraken report file and prints out a krona-compatible TEXT file
|
||||
keywords:
|
||||
- kraken
|
||||
- krakentools
|
||||
- metagenomics
|
||||
- table
|
||||
- combining
|
||||
- merging
|
||||
tools:
|
||||
- krakentools:
|
||||
description: KrakenTools is a suite of scripts to be used for post-analysis of Kraken/KrakenUniq/Kraken2/Bracken results. Please cite the relevant paper if using KrakenTools with any of the listed programs.
|
||||
homepage: https://github.com/jenniferlu717/KrakenTools
|
||||
licence: ["GPL v3"]
|
||||
|
||||
input:
|
||||
- meta:
|
||||
type: map
|
||||
description: |
|
||||
Groovy Map containing sample information
|
||||
e.g. [ id:'test', single_end:false ]
|
||||
- kreports:
|
||||
type: file
|
||||
description: List of kraken-style report files
|
||||
pattern: "*.{txt,kreport}"
|
||||
|
||||
output:
|
||||
- meta:
|
||||
type: map
|
||||
description: |
|
||||
Groovy Map containing sample information
|
||||
e.g. [ id:'test', single_end:false ]
|
||||
- versions:
|
||||
type: file
|
||||
description: File containing software versions
|
||||
pattern: "versions.yml"
|
||||
- txt:
|
||||
type: file
|
||||
description: Combined kreport file of all input files
|
||||
pattern: "*.txt"
|
||||
|
||||
authors:
|
||||
- "@jfy133"
|
23
modules/nf-core/modules/untar/main.nf
generated
23
modules/nf-core/modules/untar/main.nf
generated
|
@ -25,12 +25,23 @@ process UNTAR {
|
|||
"""
|
||||
mkdir output
|
||||
|
||||
tar \\
|
||||
-C output --strip-components 1 \\
|
||||
-xzvf \\
|
||||
$args \\
|
||||
$archive \\
|
||||
$args2
|
||||
## Ensures --strip-components only applied when top level of tar contents is a directory
|
||||
## If just files or multiple directories, place all in output
|
||||
if [[ \$(tar -tzf ${archive} | grep "/\$" | wc -l) -eq 1 ]]; then
|
||||
tar \\
|
||||
-C output --strip-components 1 \\
|
||||
-xzvf \\
|
||||
$args \\
|
||||
$archive \\
|
||||
$args2
|
||||
else
|
||||
tar \\
|
||||
-C output \\
|
||||
-xzvf \\
|
||||
$args \\
|
||||
$archive \\
|
||||
$args2
|
||||
fi
|
||||
|
||||
mv output ${untar}
|
||||
|
||||
|
|
8
modules/nf-core/modules/untar/meta.yml
generated
8
modules/nf-core/modules/untar/meta.yml
generated
|
@ -26,9 +26,9 @@ output:
|
|||
Groovy Map containing sample information
|
||||
e.g. [ id:'test', single_end:false ]
|
||||
- untar:
|
||||
type: file
|
||||
description:
|
||||
pattern: "*.*"
|
||||
type: directory
|
||||
description: Directory containing contents of archive
|
||||
pattern: "*/"
|
||||
- versions:
|
||||
type: file
|
||||
description: File containing software versions
|
||||
|
@ -36,3 +36,5 @@ output:
|
|||
authors:
|
||||
- "@joseespinosa"
|
||||
- "@drpatelh"
|
||||
- "@matthdsm"
|
||||
- "@jfy133"
|
||||
|
|
|
@ -2,9 +2,11 @@
|
|||
// Standardise output files e.g. aggregation
|
||||
//
|
||||
|
||||
include { KAIJU_KAIJU2TABLE } from '../../modules/nf-core/modules/kaiju/kaiju2table/main'
|
||||
include { MOTUS_MERGE } from '../../modules/nf-core/modules/motus/merge/main'
|
||||
include { KAIJU_KAIJU2TABLE } from '../../modules/nf-core/modules/kaiju/kaiju2table/main'
|
||||
include { KRAKENTOOLS_COMBINEKREPORTS } from '../../modules/nf-core/modules/krakentools/combinekreports/main'
|
||||
include { KRAKENTOOLS_COMBINEKREPORTS as KRAKENTOOLS_COMBINEKREPORTS_CENTRIFUGE } from '../../modules/nf-core/modules/krakentools/combinekreports/main'
|
||||
include { METAPHLAN3_MERGEMETAPHLANTABLES } from '../../modules/nf-core/modules/metaphlan3/mergemetaphlantables/main'
|
||||
include { MOTUS_MERGE } from '../../modules/nf-core/modules/motus/merge/main'
|
||||
|
||||
workflow STANDARDISATION_PROFILES {
|
||||
take:
|
||||
|
@ -24,6 +26,8 @@ workflow STANDARDISATION_PROFILES {
|
|||
ch_input_profiles = profiles
|
||||
.branch {
|
||||
motus: it[0]['tool'] == 'motus'
|
||||
kraken2: it[0]['tool'] == 'kraken2'
|
||||
centrifuge: it[0]['tool'] == 'centrifuge'
|
||||
metaphlan3: it[0]['tool'] == 'metaphlan3'
|
||||
unknown: true
|
||||
}
|
||||
|
@ -45,6 +49,23 @@ workflow STANDARDISATION_PROFILES {
|
|||
Standardise and aggregate
|
||||
*/
|
||||
|
||||
// CENTRIFUGE
|
||||
|
||||
// Collect and replace id for db_name for prefix
|
||||
// Have to sort by size to ensure first file actually has hits otherwise
|
||||
// the script fails
|
||||
ch_profiles_for_centrifuge = ch_input_profiles.centrifuge
|
||||
.map { [it[0]['db_name'], it[1]] }
|
||||
.groupTuple(sort: {-it.size()} )
|
||||
.map {
|
||||
[[id:it[0]], it[1]]
|
||||
}
|
||||
|
||||
KRAKENTOOLS_COMBINEKREPORTS_CENTRIFUGE ( ch_profiles_for_centrifuge )
|
||||
ch_standardised_tables = ch_standardised_tables.mix( KRAKENTOOLS_COMBINEKREPORTS_CENTRIFUGE.out.txt )
|
||||
ch_multiqc_files = ch_multiqc_files.mix( KRAKENTOOLS_COMBINEKREPORTS_CENTRIFUGE.out.txt )
|
||||
ch_versions = ch_versions.mix( KRAKENTOOLS_COMBINEKREPORTS_CENTRIFUGE.out.versions )
|
||||
|
||||
// Kaiju
|
||||
|
||||
// Collect and replace id for db_name for prefix
|
||||
|
@ -60,7 +81,25 @@ workflow STANDARDISATION_PROFILES {
|
|||
ch_multiqc_files = ch_multiqc_files.mix( KAIJU_KAIJU2TABLE.out.summary )
|
||||
ch_versions = ch_versions.mix( KAIJU_KAIJU2TABLE.out.versions )
|
||||
|
||||
// Kraken2
|
||||
|
||||
// Collect and replace id for db_name for prefix
|
||||
// Have to sort by size to ensure first file actually has hits otherwise
|
||||
// the script fails
|
||||
ch_profiles_for_kraken2 = ch_input_profiles.kraken2
|
||||
.map { [it[0]['db_name'], it[1]] }
|
||||
.groupTuple(sort: {-it.size()} )
|
||||
.map {
|
||||
[[id:it[0]], it[1]]
|
||||
}
|
||||
|
||||
KRAKENTOOLS_COMBINEKREPORTS ( ch_profiles_for_kraken2 )
|
||||
ch_standardised_tables = ch_standardised_tables.mix( KRAKENTOOLS_COMBINEKREPORTS.out.txt )
|
||||
ch_multiqc_files = ch_multiqc_files.mix( KRAKENTOOLS_COMBINEKREPORTS.out.txt )
|
||||
ch_versions = ch_versions.mix( KRAKENTOOLS_COMBINEKREPORTS.out.versions )
|
||||
|
||||
// MetaPhlAn3
|
||||
|
||||
ch_profiles_for_metaphlan3 = ch_input_profiles.metaphlan3
|
||||
.map { [it[0]['db_name'], it[1]] }
|
||||
.groupTuple()
|
||||
|
|
Loading…
Reference in a new issue