diff --git a/README.md b/README.md index 672c2a9..ddbaddf 100644 --- a/README.md +++ b/README.md @@ -12,6 +12,8 @@ ## Introduction +> ⚠️ This pipeline is still under development! While the pipeline is usable, not all functionality will be available! + **nf-core/taxprofiler** is a bioinformatics best-practice analysis pipeline for taxonomic profiling of shotgun metagenomic data. It allows for in-parallel profiling with multiple profiling tools against multiple databases, produces standardised output tables. diff --git a/conf/modules.config b/conf/modules.config index 01e0e17..f64c423 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -294,6 +294,15 @@ process { ] } + withName: KRAKENTOOLS_COMBINEKREPORTS { + ext.prefix = { "kraken2_${meta.id}_combined_reports" } + publishDir = [ + path: { "${params.outdir}/kraken2/" }, + mode: params.publish_dir_mode, + pattern: '*.{txt}' + ] + } + withName: KRONA_CLEANUP { ext.prefix = params.perform_runmerging ? { "${meta.id}-${meta.db_name}" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}" } publishDir = [ @@ -367,6 +376,15 @@ process { ] } + withName: KRAKENTOOLS_COMBINEKREPORTS_CENTRIFUGE { + ext.prefix = { "centrifuge_${meta.id}_combined_reports" } + publishDir = [ + path: { "${params.outdir}/centrifuge/" }, + mode: params.publish_dir_mode, + pattern: '*.{txt}' + ] + } + withName: KAIJU_KAIJU { ext.prefix = params.perform_runmerging ? { "${meta.id}-${meta.db_name}" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}" } publishDir = [ @@ -378,7 +396,7 @@ process { } withName: KAIJU_KAIJU2TABLE { - ext.prefix = { "${meta.id}_combined_reports" } + ext.prefix = { "kaiju_${meta.id}_combined_reports" } publishDir = [ path: { "${params.outdir}/kaiju/" }, mode: params.publish_dir_mode, diff --git a/docs/usage.md b/docs/usage.md index 3971c58..8ae5257 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -410,3 +410,13 @@ We recommend adding the following line to your environment to limit this (typica ```bash NXF_OPTS='-Xms1g -Xmx4g' ``` + +## Troubleshooting and FAQs + +### I get a warning during centrifuge_kreport process with exit status 255. + +When a sample has insufficient hits for abundance estimation, the resulting `report.txt` file will be empty. + +When trying to convert this to a kraken-style report, the conversion tool will exit with a status code `255`, and provide a `WARN`. + +This is **not** an error nor a failure of the pipeline, just your sample has no hits to the provided database when using centrifuge. diff --git a/modules.json b/modules.json index 7c02b3e..0634112 100644 --- a/modules.json +++ b/modules.json @@ -43,8 +43,7 @@ }, "fastp": { "branch": "master", - "git_sha": "7e8ad566883449e7939062b5e2bcf53fc1e0002f", - "patch": "modules/nf-core/modules/fastp/fastp.diff" + "git_sha": "2c70c1c1951aaf884d2e8d8d9c871db79f7b35aa" }, "fastqc": { "branch": "master", @@ -74,6 +73,10 @@ "branch": "master", "git_sha": "409a308ba46284d8ebb48c2c1befd6f6433db3f7" }, + "krakentools/combinekreports": { + "branch": "master", + "git_sha": "ee0346b4d14ffdc15ce7e093ca1363cd07c9bd78" + }, "krakentools/kreport2krona": { "branch": "master", "git_sha": "233fa70811a03a4cecb2ece483b5c8396e2cee1d" @@ -140,7 +143,7 @@ }, "untar": { "branch": "master", - "git_sha": "5e7b1ef9a5a2d9258635bcbf70fcf37dacd1b247" + "git_sha": "393dbd6ddafe3f18eac02893dd4a21e4d45de679" } } } diff --git a/modules/nf-core/modules/fastp/fastp.diff b/modules/nf-core/modules/fastp/fastp.diff deleted file mode 100644 index a05d486..0000000 --- a/modules/nf-core/modules/fastp/fastp.diff +++ /dev/null @@ -1,33 +0,0 @@ -Changes in module 'nf-core/modules/fastp' ---- modules/nf-core/modules/fastp/main.nf -+++ modules/nf-core/modules/fastp/main.nf -@@ -33,9 +33,8 @@ - def fail_fastq = save_trimmed_fail ? "--failed_out ${prefix}.fail.fastq.gz" : '' - """ - [ ! -f ${prefix}.fastq.gz ] && ln -sf $reads ${prefix}.fastq.gz -- cat ${prefix}.fastq.gz \\ -- | fastp \\ -- --stdin \\ -+ -+ fastp \\ - --stdout \\ - --in1 ${prefix}.fastq.gz \\ - --thread $task.cpus \\ -@@ -45,6 +44,7 @@ - $args \\ - 2> ${prefix}.fastp.log \\ - | gzip -c > ${prefix}.fastp.fastq.gz -+ - cat <<-END_VERSIONS > versions.yml - "${task.process}": - fastp: \$(fastp --version 2>&1 | sed -e "s/fastp //g") -@@ -69,6 +69,7 @@ - --detect_adapter_for_pe \\ - $args \\ - 2> ${prefix}.fastp.log -+ - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - -************************************************************ diff --git a/modules/nf-core/modules/fastp/main.nf b/modules/nf-core/modules/fastp/main.nf index abbdbad..11ea4db 100644 --- a/modules/nf-core/modules/fastp/main.nf +++ b/modules/nf-core/modules/fastp/main.nf @@ -26,14 +26,14 @@ process FASTP { script: def args = task.ext.args ?: '' - // Added soft-links to original fastqs for consistent naming in MultiQC def prefix = task.ext.prefix ?: "${meta.id}" + def fail_fastq = save_trimmed_fail && meta.single_end ? "--failed_out ${prefix}.fail.fastq.gz" : save_trimmed_fail && !meta.single_end ? "--unpaired1 ${prefix}_1.fail.fastq.gz --unpaired2 ${prefix}_2.fail.fastq.gz" : '' + // Added soft-links to original fastqs for consistent naming in MultiQC // Use single ended for interleaved. Add --interleaved_in in config. - if (meta.single_end) { - def fail_fastq = save_trimmed_fail ? "--failed_out ${prefix}.fail.fastq.gz" : '' + if ( task.ext.args?.contains('--interleaved_in') ) { """ [ ! -f ${prefix}.fastq.gz ] && ln -sf $reads ${prefix}.fastq.gz - + fastp \\ --stdout \\ --in1 ${prefix}.fastq.gz \\ @@ -45,13 +45,32 @@ process FASTP { 2> ${prefix}.fastp.log \\ | gzip -c > ${prefix}.fastp.fastq.gz + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fastp: \$(fastp --version 2>&1 | sed -e "s/fastp //g") + END_VERSIONS + """ + } else if (meta.single_end) { + """ + [ ! -f ${prefix}.fastq.gz ] && ln -sf $reads ${prefix}.fastq.gz + + fastp \\ + --stdout \\ + --in1 ${prefix}.fastq.gz \\ + --out1 ${prefix}.fastp.fastq.gz \\ + --thread $task.cpus \\ + --json ${prefix}.fastp.json \\ + --html ${prefix}.fastp.html \\ + $fail_fastq \\ + $args \\ + 2> ${prefix}.fastp.log + cat <<-END_VERSIONS > versions.yml "${task.process}": fastp: \$(fastp --version 2>&1 | sed -e "s/fastp //g") END_VERSIONS """ } else { - def fail_fastq = save_trimmed_fail ? "--unpaired1 ${prefix}_1.fail.fastq.gz --unpaired2 ${prefix}_2.fail.fastq.gz" : '' def merge_fastq = save_merged ? "-m --merged_out ${prefix}.merged.fastq.gz" : '' """ [ ! -f ${prefix}_1.fastq.gz ] && ln -sf ${reads[0]} ${prefix}_1.fastq.gz @@ -69,7 +88,6 @@ process FASTP { --detect_adapter_for_pe \\ $args \\ 2> ${prefix}.fastp.log - cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/modules/fastp/meta.yml b/modules/nf-core/modules/fastp/meta.yml index 598c336..2368fde 100644 --- a/modules/nf-core/modules/fastp/meta.yml +++ b/modules/nf-core/modules/fastp/meta.yml @@ -21,7 +21,8 @@ input: type: file description: | List of input FastQ files of size 1 and 2 for single-end and paired-end data, - respectively. + respectively. If you wish to run interleaved paired-end data, supply as single-end data + but with `--interleaved_in` in your `modules.conf`'s `ext.args` for the module. - save_trimmed_fail: type: boolean description: Specify true to save files that failed to pass trimming thresholds ending in `*.fail.fastq.gz` diff --git a/modules/nf-core/modules/krakentools/combinekreports/main.nf b/modules/nf-core/modules/krakentools/combinekreports/main.nf new file mode 100644 index 0000000..849f39c --- /dev/null +++ b/modules/nf-core/modules/krakentools/combinekreports/main.nf @@ -0,0 +1,34 @@ +process KRAKENTOOLS_COMBINEKREPORTS { + label 'process_low' + + conda (params.enable_conda ? "bioconda::krakentools=1.2" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/krakentools:1.2--pyh5e36f6f_0': + 'quay.io/biocontainers/krakentools:1.2--pyh5e36f6f_0' }" + + input: + tuple val(meta), path(kreports) + + output: + tuple val(meta), path("*.txt"), emit: txt + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + def VERSION = '1.2' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + """ + combine_kreports.py \\ + -r ${kreports} \\ + -o ${prefix}.txt \\ + ${args} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + combine_kreports.py: ${VERSION} + END_VERSIONS + """ +} diff --git a/modules/nf-core/modules/krakentools/combinekreports/meta.yml b/modules/nf-core/modules/krakentools/combinekreports/meta.yml new file mode 100644 index 0000000..213fc8c --- /dev/null +++ b/modules/nf-core/modules/krakentools/combinekreports/meta.yml @@ -0,0 +1,43 @@ +name: krakentools_combinekreports +description: Takes a Kraken report file and prints out a krona-compatible TEXT file +keywords: + - kraken + - krakentools + - metagenomics + - table + - combining + - merging +tools: + - krakentools: + description: KrakenTools is a suite of scripts to be used for post-analysis of Kraken/KrakenUniq/Kraken2/Bracken results. Please cite the relevant paper if using KrakenTools with any of the listed programs. + homepage: https://github.com/jenniferlu717/KrakenTools + licence: ["GPL v3"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - kreports: + type: file + description: List of kraken-style report files + pattern: "*.{txt,kreport}" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - txt: + type: file + description: Combined kreport file of all input files + pattern: "*.txt" + +authors: + - "@jfy133" diff --git a/modules/nf-core/modules/untar/main.nf b/modules/nf-core/modules/untar/main.nf index 4128652..007871b 100644 --- a/modules/nf-core/modules/untar/main.nf +++ b/modules/nf-core/modules/untar/main.nf @@ -25,12 +25,23 @@ process UNTAR { """ mkdir output - tar \\ - -C output --strip-components 1 \\ - -xzvf \\ - $args \\ - $archive \\ - $args2 + ## Ensures --strip-components only applied when top level of tar contents is a directory + ## If just files or multiple directories, place all in output + if [[ \$(tar -tzf ${archive} | grep "/\$" | wc -l) -eq 1 ]]; then + tar \\ + -C output --strip-components 1 \\ + -xzvf \\ + $args \\ + $archive \\ + $args2 + else + tar \\ + -C output \\ + -xzvf \\ + $args \\ + $archive \\ + $args2 + fi mv output ${untar} diff --git a/modules/nf-core/modules/untar/meta.yml b/modules/nf-core/modules/untar/meta.yml index d426919..ea7a3f3 100644 --- a/modules/nf-core/modules/untar/meta.yml +++ b/modules/nf-core/modules/untar/meta.yml @@ -26,9 +26,9 @@ output: Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - untar: - type: file - description: - pattern: "*.*" + type: directory + description: Directory containing contents of archive + pattern: "*/" - versions: type: file description: File containing software versions @@ -36,3 +36,5 @@ output: authors: - "@joseespinosa" - "@drpatelh" + - "@matthdsm" + - "@jfy133" diff --git a/subworkflows/local/standardisation_profiles.nf b/subworkflows/local/standardisation_profiles.nf index a557bff..f63c10a 100644 --- a/subworkflows/local/standardisation_profiles.nf +++ b/subworkflows/local/standardisation_profiles.nf @@ -2,9 +2,11 @@ // Standardise output files e.g. aggregation // -include { KAIJU_KAIJU2TABLE } from '../../modules/nf-core/modules/kaiju/kaiju2table/main' -include { MOTUS_MERGE } from '../../modules/nf-core/modules/motus/merge/main' +include { KAIJU_KAIJU2TABLE } from '../../modules/nf-core/modules/kaiju/kaiju2table/main' +include { KRAKENTOOLS_COMBINEKREPORTS } from '../../modules/nf-core/modules/krakentools/combinekreports/main' +include { KRAKENTOOLS_COMBINEKREPORTS as KRAKENTOOLS_COMBINEKREPORTS_CENTRIFUGE } from '../../modules/nf-core/modules/krakentools/combinekreports/main' include { METAPHLAN3_MERGEMETAPHLANTABLES } from '../../modules/nf-core/modules/metaphlan3/mergemetaphlantables/main' +include { MOTUS_MERGE } from '../../modules/nf-core/modules/motus/merge/main' workflow STANDARDISATION_PROFILES { take: @@ -24,6 +26,8 @@ workflow STANDARDISATION_PROFILES { ch_input_profiles = profiles .branch { motus: it[0]['tool'] == 'motus' + kraken2: it[0]['tool'] == 'kraken2' + centrifuge: it[0]['tool'] == 'centrifuge' metaphlan3: it[0]['tool'] == 'metaphlan3' unknown: true } @@ -45,6 +49,23 @@ workflow STANDARDISATION_PROFILES { Standardise and aggregate */ + // CENTRIFUGE + + // Collect and replace id for db_name for prefix + // Have to sort by size to ensure first file actually has hits otherwise + // the script fails + ch_profiles_for_centrifuge = ch_input_profiles.centrifuge + .map { [it[0]['db_name'], it[1]] } + .groupTuple(sort: {-it.size()} ) + .map { + [[id:it[0]], it[1]] + } + + KRAKENTOOLS_COMBINEKREPORTS_CENTRIFUGE ( ch_profiles_for_centrifuge ) + ch_standardised_tables = ch_standardised_tables.mix( KRAKENTOOLS_COMBINEKREPORTS_CENTRIFUGE.out.txt ) + ch_multiqc_files = ch_multiqc_files.mix( KRAKENTOOLS_COMBINEKREPORTS_CENTRIFUGE.out.txt ) + ch_versions = ch_versions.mix( KRAKENTOOLS_COMBINEKREPORTS_CENTRIFUGE.out.versions ) + // Kaiju // Collect and replace id for db_name for prefix @@ -60,7 +81,25 @@ workflow STANDARDISATION_PROFILES { ch_multiqc_files = ch_multiqc_files.mix( KAIJU_KAIJU2TABLE.out.summary ) ch_versions = ch_versions.mix( KAIJU_KAIJU2TABLE.out.versions ) + // Kraken2 + + // Collect and replace id for db_name for prefix + // Have to sort by size to ensure first file actually has hits otherwise + // the script fails + ch_profiles_for_kraken2 = ch_input_profiles.kraken2 + .map { [it[0]['db_name'], it[1]] } + .groupTuple(sort: {-it.size()} ) + .map { + [[id:it[0]], it[1]] + } + + KRAKENTOOLS_COMBINEKREPORTS ( ch_profiles_for_kraken2 ) + ch_standardised_tables = ch_standardised_tables.mix( KRAKENTOOLS_COMBINEKREPORTS.out.txt ) + ch_multiqc_files = ch_multiqc_files.mix( KRAKENTOOLS_COMBINEKREPORTS.out.txt ) + ch_versions = ch_versions.mix( KRAKENTOOLS_COMBINEKREPORTS.out.versions ) + // MetaPhlAn3 + ch_profiles_for_metaphlan3 = ch_input_profiles.metaphlan3 .map { [it[0]['db_name'], it[1]] } .groupTuple()