From ea41a8a6f761b9993d857570e872abaae3fea555 Mon Sep 17 00:00:00 2001 From: Sofia Stamouli <91951607+sofstam@users.noreply.github.com> Date: Fri, 18 Mar 2022 15:39:27 +0100 Subject: [PATCH] Add centrifuge module (#1410) * Add centrifuge module * Add centrifuge module * Add centrifuge module * Add centrifuge module --- modules/centrifuge/main.nf | 63 ++++++++++++++++++++ modules/centrifuge/meta.yml | 73 ++++++++++++++++++++++++ tests/config/pytest_modules.yml | 12 ++-- tests/modules/centrifuge/main.nf | 33 +++++++++++ tests/modules/centrifuge/nextflow.config | 5 ++ tests/modules/centrifuge/test.yml | 22 +++++++ 6 files changed, 204 insertions(+), 4 deletions(-) create mode 100644 modules/centrifuge/main.nf create mode 100644 modules/centrifuge/meta.yml create mode 100644 tests/modules/centrifuge/main.nf create mode 100644 tests/modules/centrifuge/nextflow.config create mode 100644 tests/modules/centrifuge/test.yml diff --git a/modules/centrifuge/main.nf b/modules/centrifuge/main.nf new file mode 100644 index 00000000..7eb566da --- /dev/null +++ b/modules/centrifuge/main.nf @@ -0,0 +1,63 @@ +process CENTRIFUGE { + tag "$meta.id" + label 'process_high' + + conda (params.enable_conda ? "bioconda::centrifuge=1.0.4_beta" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/centrifuge:1.0.4_beta--h9a82719_6' : + 'quay.io/biocontainers/centrifuge:1.0.4_beta--h9a82719_6' }" + + input: + tuple val(meta), path(reads) + path db + val save_unaligned + val save_aligned + val sam_format + + output: + tuple val(meta), path('*report.txt') , emit: report + tuple val(meta), path('*results.txt') , emit: results + tuple val(meta), path('*kreport.txt') , emit: kreport + tuple val(meta), path('*.sam') , optional: true, emit: sam + tuple val(meta), path('*.mapped.fastq{,.1,.2}.gz') , optional: true, emit: fastq_mapped + tuple val(meta), path('*.unmapped.fastq{,.1,.2}.gz') , optional: true, emit: fastq_unmapped + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def paired = meta.single_end ? "-U ${reads}" : "-1 ${reads[0]} -2 ${reads[1]}" + def db_name = db.toString().replace(".tar.gz","") + def unaligned = '' + def aligned = '' + if (meta.single_end) { + unaligned = save_unaligned ? "--un-gz ${prefix}.unmapped.fastq.gz" : '' + aligned = save_aligned ? "--al-gz ${prefix}.mapped.fastq.gz" : '' + } else { + unaligned = save_unaligned ? "--un-conc-gz ${prefix}.unmapped.fastq.gz" : '' + aligned = save_aligned ? "--al-conc-gz ${prefix}.mapped.fastq.gz" : '' + } + def sam_output = sam_format ? "--out-fmt 'sam'" : '' + """ + tar -xf $db + centrifuge \\ + -x $db_name \\ + -p $task.cpus \\ + $paired \\ + --report-file ${prefix}.report.txt \\ + -S ${prefix}.results.txt \\ + $unaligned \\ + $aligned \\ + $sam_output \\ + $args + centrifuge-kreport -x $db_name ${prefix}.results.txt > ${prefix}.kreport.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + centrifuge: \$( centrifuge --version | sed -n 1p | sed 's/^.*centrifuge-class version //') + END_VERSIONS + """ +} diff --git a/modules/centrifuge/meta.yml b/modules/centrifuge/meta.yml new file mode 100644 index 00000000..3adf0e23 --- /dev/null +++ b/modules/centrifuge/meta.yml @@ -0,0 +1,73 @@ +name: centrifuge +description: Classifies metagenomic sequence data +keywords: + - classify + - metagenomics + - fastq + - db +tools: + - centrifuge: + description: Centrifuge is a classifier for metagenomic sequences. + homepage: https://ccb.jhu.edu/software/centrifuge/ + documentation: https://ccb.jhu.edu/software/centrifuge/manual.shtml + doi: 10.1101/gr.210641.116 + licence: ["GPL v3"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. + - db: + type: directory + description: Centrifuge database in .tar.gz format + pattern: "*.tar.gz" + - save_unaligned: + type: value + description: If true unmapped fastq files are saved + - save_aligned: + type: value + description: If true mapped fastq files are saved +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - report: + type: file + description: | + File containing a classification summary + pattern: "*.{report.txt}" + - results: + type: file + description: | + File containing classification results + pattern: "*.{results.txt}" + - kreport: + type: file + description: | + File containing kraken-style report from centrifuge + out files. + pattern: "*.{kreport.txt}" + - fastq_unmapped: + type: file + description: Unmapped fastq files + pattern: "*.unmapped.fastq.gz" + - fastq_mapped: + type: file + description: Mapped fastq files + pattern: "*.mapped.fastq.gz" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@sofstam" + - "@jfy133" + - "@sateeshperi" diff --git a/tests/config/pytest_modules.yml b/tests/config/pytest_modules.yml index 48c3bb7d..ea17ce2e 100644 --- a/tests/config/pytest_modules.yml +++ b/tests/config/pytest_modules.yml @@ -202,6 +202,10 @@ bedtools/subtract: - modules/bedtools/subtract/** - tests/modules/bedtools/subtract/** +biobambam/bammarkduplicates2: + - modules/biobambam/bammarkduplicates2/** + - tests/modules/biobambam/bammarkduplicates2/** + biscuit/align: - modules/biscuit/index/** - modules/biscuit/align/** @@ -245,10 +249,6 @@ biscuit/vcf2bed: - modules/biscuit/vcf2bed/** - tests/modules/biscuit/vcf2bed/** -biobambam/bammarkduplicates2: - - modules/biobambam/bammarkduplicates2/** - - tests/modules/biobambam/bammarkduplicates2/** - bismark/align: - modules/bismark/align/** - modules/bismark/genomepreparation/** @@ -379,6 +379,10 @@ cellranger/mkref: - modules/cellranger/gtf/** - tests/modules/cellranger/gtf/** +centrifuge: + - modules/centrifuge/** + - tests/modules/centrifuge/** + checkm/lineagewf: - modules/checkm/lineagewf/** - tests/modules/checkm/lineagewf/** diff --git a/tests/modules/centrifuge/main.nf b/tests/modules/centrifuge/main.nf new file mode 100644 index 00000000..a8eb2fcb --- /dev/null +++ b/tests/modules/centrifuge/main.nf @@ -0,0 +1,33 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { CENTRIFUGE } from '../../../modules/centrifuge/main.nf' + +workflow test_centrifuge_single_end { + input = [ [ id:'test', single_end:true ], // meta map + [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) ] + ] + db = file("https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/delete_me/minigut_cf.tar.gz", checkIfExists: true) + save_unaligned = true + save_aligned = false + sam_format = false + + CENTRIFUGE ( input, db, save_unaligned, save_aligned, sam_format ) + +} + +workflow test_centrifuge_paired_end { + input = [ [ id:'test', single_end:false ], // meta map + [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ] + ] + db = file("https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/delete_me/minigut_cf.tar.gz", checkIfExists: true) + save_unaligned = true + save_aligned = false + sam_format = false + + CENTRIFUGE ( input, db, save_unaligned, save_aligned, sam_format ) + + +} diff --git a/tests/modules/centrifuge/nextflow.config b/tests/modules/centrifuge/nextflow.config new file mode 100644 index 00000000..50f50a7a --- /dev/null +++ b/tests/modules/centrifuge/nextflow.config @@ -0,0 +1,5 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + +} \ No newline at end of file diff --git a/tests/modules/centrifuge/test.yml b/tests/modules/centrifuge/test.yml new file mode 100644 index 00000000..a7b4360b --- /dev/null +++ b/tests/modules/centrifuge/test.yml @@ -0,0 +1,22 @@ +- name: centrifuge test_centrifuge_single_end + command: nextflow run tests/modules/centrifuge -entry test_centrifuge_single_end -c tests/config/nextflow.config + tags: + - centrifuge + files: + - path: output/centrifuge/test.kreport.txt + - path: output/centrifuge/test.report.txt + - path: output/centrifuge/test.results.txt + - path: output/centrifuge/test.unmapped.fastq.gz + - path: output/centrifuge/versions.yml + +- name: centrifuge test_centrifuge_paired_end + command: nextflow run tests/modules/centrifuge -entry test_centrifuge_paired_end -c tests/config/nextflow.config + tags: + - centrifuge + files: + - path: output/centrifuge/test.kreport.txt + - path: output/centrifuge/test.report.txt + - path: output/centrifuge/test.results.txt + - path: output/centrifuge/test.unmapped.fastq.1.gz + - path: output/centrifuge/test.unmapped.fastq.2.gz + - path: output/centrifuge/versions.yml