From 03de60e6fa3b047da3433b5e624d8746af4464f5 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Fri, 30 Sep 2022 15:50:47 +0200 Subject: [PATCH] Add CHECKM/QA (#2137) * Bump checkM version and add support for pre-defined database * Add markfile output to checkm/lineagewf and add checkm/qa * Remove unnecessary BAM from tests * Fix channels to ensure single meta * Blah * Use secondary name evwyhere * Remove second incovcatiojn * Don't run module directrly for join * And for the other test * Correct module name * Fix name again * Try and fix out_format 9 not being picked up * Contains to matches to allow regex * Fix tests (finally) * Fix verisoning * [automated] Fix linting with Prettier * Update modules/checkm/qa/main.nf Co-authored-by: nvnieuwk <101190534+nvnieuwk@users.noreply.github.com> Co-authored-by: nf-core-bot Co-authored-by: nvnieuwk <101190534+nvnieuwk@users.noreply.github.com> --- modules/checkm/lineagewf/main.nf | 7 +-- modules/checkm/lineagewf/meta.yml | 4 ++ modules/checkm/qa/main.nf | 44 ++++++++++++++++ modules/checkm/qa/meta.yml | 67 +++++++++++++++++++++++++ tests/config/pytest_modules.yml | 4 ++ tests/modules/checkm/qa/main.nf | 44 ++++++++++++++++ tests/modules/checkm/qa/nextflow.config | 16 ++++++ tests/modules/checkm/qa/test.yml | 16 ++++++ 8 files changed, 199 insertions(+), 3 deletions(-) create mode 100644 modules/checkm/qa/main.nf create mode 100644 modules/checkm/qa/meta.yml create mode 100644 tests/modules/checkm/qa/main.nf create mode 100644 tests/modules/checkm/qa/nextflow.config create mode 100644 tests/modules/checkm/qa/test.yml diff --git a/modules/checkm/lineagewf/main.nf b/modules/checkm/lineagewf/main.nf index 66e2e222..db0535e1 100644 --- a/modules/checkm/lineagewf/main.nf +++ b/modules/checkm/lineagewf/main.nf @@ -13,9 +13,10 @@ process CHECKM_LINEAGEWF { path db output: - tuple val(meta), path("${prefix}") , emit: checkm_output - tuple val(meta), path("${prefix}.tsv"), emit: checkm_tsv - path "versions.yml" , emit: versions + tuple val(meta), path("${prefix}") , emit: checkm_output + tuple val(meta), path("${prefix}/lineage.ms"), emit: marker_file + tuple val(meta), path("${prefix}.tsv") , emit: checkm_tsv + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when diff --git a/modules/checkm/lineagewf/meta.yml b/modules/checkm/lineagewf/meta.yml index 6cfec189..4716a3e9 100644 --- a/modules/checkm/lineagewf/meta.yml +++ b/modules/checkm/lineagewf/meta.yml @@ -52,6 +52,10 @@ output: type: directory description: CheckM output directory pattern: "*/" + - checkm_output: + type: file + description: Lineage markfer file + pattern: "lineage.ms" - checkm_tsv: type: file description: CheckM summary completeness statistics table diff --git a/modules/checkm/qa/main.nf b/modules/checkm/qa/main.nf new file mode 100644 index 00000000..80b9747f --- /dev/null +++ b/modules/checkm/qa/main.nf @@ -0,0 +1,44 @@ +process CHECKM_QA { + tag "$meta.id" + label 'process_low' + + conda (params.enable_conda ? "bioconda::checkm-genome=1.2.1" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/checkm-genome:1.2.1--pyhdfd78af_0' : + 'quay.io/biocontainers/checkm-genome:1.2.1--pyhdfd78af_0' }" + + input: + tuple val(meta), path(analysis_dir), path(marker_file), path(coverage_file) + path exclude_marker_file + + output: + tuple val(meta), path("${prefix}.txt") , optional: true, emit: output + tuple val(meta), path("${prefix}.fasta"), optional: true, emit: fasta + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + suffix = task.ext.args?.matches(".*-o 9.*|.*--out_file 9.*") ? "fasta" : "txt" + def coverage = coverage_file ? "--coverage_file ${coverage_file}" : "" + def exclude = exclude_marker_file ? "--exclude_markers ${marker_filer}" : "" + """ + checkm \\ + qa \\ + --threads ${task.cpus} \\ + --file ${prefix}.${suffix} \\ + $marker_file \\ + $analysis_dir \\ + $coverage \\ + $exclude \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + checkm: \$( checkm 2>&1 | grep '...:::' | sed 's/.*CheckM v//;s/ .*//' ) + END_VERSIONS + """ +} diff --git a/modules/checkm/qa/meta.yml b/modules/checkm/qa/meta.yml new file mode 100644 index 00000000..d0af39af --- /dev/null +++ b/modules/checkm/qa/meta.yml @@ -0,0 +1,67 @@ +name: checkm_qa +description: CheckM provides a set of tools for assessing the quality of genomes recovered from isolates, single cells, or metagenomes. +keywords: + - checkm + - mag + - metagenome + - quality + - isolates + - microbes + - single cells + - completeness + - contamination + - bins + - genome bins + - qa + - quality assurnce +tools: + - checkm: + description: Assess the quality of microbial genomes recovered from isolates, single cells, and metagenomes. + homepage: https://ecogenomics.github.io/CheckM/ + documentation: https://github.com/Ecogenomics/CheckM/wiki + tool_dev_url: https://github.com/Ecogenomics/CheckM + doi: "10.1101/gr.186072.114" + licence: ["GPL v3"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - analysis_dir: + type: file + description: Directory containing output of checkm/analyze or checkm/lineage_wf etc. + pattern: "*" + - marker_file: + type: file + description: Marker file specified during checkm/analyze or produced by checkm/{lineage,taxonomy}_wf + pattern: "*.ms" + - coverage_file: + type: file + description: File containing coverage of each sequence (generated by checkm coverage) + - exclude_marker_file: + type: file + description: File specifying markers to exclude from marker sets + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - output: + type: file + description: "Default completeness statistics in various formats, as specified with --out_format (excluding option: 9)" + pattern: "*.txt" + - fasta: + type: file + description: Output in fasta format (only if --out_format 9) + pattern: "*.fasta" + +authors: + - "@jfy133" diff --git a/tests/config/pytest_modules.yml b/tests/config/pytest_modules.yml index cae10d10..d1fbe0d8 100644 --- a/tests/config/pytest_modules.yml +++ b/tests/config/pytest_modules.yml @@ -499,6 +499,10 @@ checkm/lineagewf: - modules/checkm/lineagewf/** - tests/modules/checkm/lineagewf/** +checkm/qa: + - modules/checkm/qa/** + - tests/modules/checkm/qa/** + chromap/chromap: - modules/chromap/chromap/** - tests/modules/chromap/chromap/** diff --git a/tests/modules/checkm/qa/main.nf b/tests/modules/checkm/qa/main.nf new file mode 100644 index 00000000..2be2dd67 --- /dev/null +++ b/tests/modules/checkm/qa/main.nf @@ -0,0 +1,44 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { CHECKM_LINEAGEWF } from '../../../../modules/checkm/lineagewf/main.nf' +include { CHECKM_LINEAGEWF as CHECKM_LINEAGEWF_FASTA } from '../../../../modules/checkm/lineagewf/main.nf' +include { CHECKM_QA } from '../../../../modules/checkm/qa/main.nf' +include { CHECKM_QA as CHECKM_QA_FASTA } from '../../../../modules/checkm/qa/main.nf' + +workflow test_checkm_qa { + + input = [ [ id:'test', single_end:false ], // meta map + file(params.test_data['sarscov2']['illumina']['contigs_fasta'], checkIfExists: true) ] + fasta_ext = 'fasta' + + CHECKM_LINEAGEWF ( input, fasta_ext, [] ) + + ch_checkmqa_input = CHECKM_LINEAGEWF.out.checkm_output + .join(CHECKM_LINEAGEWF.out.marker_file) + .map{ + meta, dir, marker -> + [ meta, dir, marker, []] + } + + CHECKM_QA ( ch_checkmqa_input, [] ) +} + +workflow test_checkm_qa_fasta { + + input = [ [ id:'test', single_end:false ], // meta map + file(params.test_data['sarscov2']['illumina']['contigs_fasta'], checkIfExists: true) ] + fasta_ext = 'fasta' + + CHECKM_LINEAGEWF_FASTA ( input, fasta_ext, [] ) + + ch_checkmqa_input = CHECKM_LINEAGEWF_FASTA.out.checkm_output + .join(CHECKM_LINEAGEWF_FASTA.out.marker_file) + .map{ + meta, dir, marker -> + [ meta, dir, marker, []] + } + + CHECKM_QA_FASTA ( ch_checkmqa_input, [] ) +} diff --git a/tests/modules/checkm/qa/nextflow.config b/tests/modules/checkm/qa/nextflow.config new file mode 100644 index 00000000..0f5e524b --- /dev/null +++ b/tests/modules/checkm/qa/nextflow.config @@ -0,0 +1,16 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + + withName: CHECKM_QA { + ext.prefix = { "${meta.id}.qa" } + } + + + withName: CHECKM_QA_FASTA { + ext.args = "-o 9" + ext.prefix = { "${meta.id}.qa" } + } + + +} diff --git a/tests/modules/checkm/qa/test.yml b/tests/modules/checkm/qa/test.yml new file mode 100644 index 00000000..0c3fa4c8 --- /dev/null +++ b/tests/modules/checkm/qa/test.yml @@ -0,0 +1,16 @@ +- name: checkm qa test_checkm_qa + command: nextflow run ./tests/modules/checkm/qa -entry test_checkm_qa -c ./tests/config/nextflow.config -c ./tests/modules/checkm/qa/nextflow.config + tags: + - checkm/qa + - checkm + files: + - path: output/checkm/test.qa.txt + md5sum: 645f4282569afb4b171396732b2d2582 + +- name: checkm qa test_checkm_qa_fasta + command: nextflow run ./tests/modules/checkm/qa -entry test_checkm_qa_fasta -c ./tests/config/nextflow.config -c ./tests/modules/checkm/qa/nextflow.config + tags: + - checkm/qa + - checkm + files: + - path: output/checkm/test.qa.fasta