Add CHECKM/QA (#2137)

* Bump checkM version and add support for pre-defined database

* Add markfile output to checkm/lineagewf and add checkm/qa

* Remove unnecessary BAM from tests

* Fix channels to ensure single meta

* Blah

* Use secondary name evwyhere

* Remove second incovcatiojn

* Don't run module directrly for join

* And for the other test

* Correct module name

* Fix name again

* Try and fix out_format 9 not being picked up

* Contains to matches to allow regex

* Fix tests (finally)

* Fix verisoning

* [automated] Fix linting with Prettier

* Update modules/checkm/qa/main.nf

Co-authored-by: nvnieuwk <101190534+nvnieuwk@users.noreply.github.com>

Co-authored-by: nf-core-bot <core@nf-co.re>
Co-authored-by: nvnieuwk <101190534+nvnieuwk@users.noreply.github.com>
This commit is contained in:
James A. Fellows Yates 2022-09-30 15:50:47 +02:00 committed by GitHub
parent 31c0b49f65
commit 03de60e6fa
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
8 changed files with 199 additions and 3 deletions

View file

@ -14,6 +14,7 @@ process CHECKM_LINEAGEWF {
output:
tuple val(meta), path("${prefix}") , emit: checkm_output
tuple val(meta), path("${prefix}/lineage.ms"), emit: marker_file
tuple val(meta), path("${prefix}.tsv") , emit: checkm_tsv
path "versions.yml" , emit: versions

View file

@ -52,6 +52,10 @@ output:
type: directory
description: CheckM output directory
pattern: "*/"
- checkm_output:
type: file
description: Lineage markfer file
pattern: "lineage.ms"
- checkm_tsv:
type: file
description: CheckM summary completeness statistics table

44
modules/checkm/qa/main.nf Normal file
View file

@ -0,0 +1,44 @@
process CHECKM_QA {
tag "$meta.id"
label 'process_low'
conda (params.enable_conda ? "bioconda::checkm-genome=1.2.1" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/checkm-genome:1.2.1--pyhdfd78af_0' :
'quay.io/biocontainers/checkm-genome:1.2.1--pyhdfd78af_0' }"
input:
tuple val(meta), path(analysis_dir), path(marker_file), path(coverage_file)
path exclude_marker_file
output:
tuple val(meta), path("${prefix}.txt") , optional: true, emit: output
tuple val(meta), path("${prefix}.fasta"), optional: true, emit: fasta
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
prefix = task.ext.prefix ?: "${meta.id}"
suffix = task.ext.args?.matches(".*-o 9.*|.*--out_file 9.*") ? "fasta" : "txt"
def coverage = coverage_file ? "--coverage_file ${coverage_file}" : ""
def exclude = exclude_marker_file ? "--exclude_markers ${marker_filer}" : ""
"""
checkm \\
qa \\
--threads ${task.cpus} \\
--file ${prefix}.${suffix} \\
$marker_file \\
$analysis_dir \\
$coverage \\
$exclude \\
$args
cat <<-END_VERSIONS > versions.yml
"${task.process}":
checkm: \$( checkm 2>&1 | grep '...:::' | sed 's/.*CheckM v//;s/ .*//' )
END_VERSIONS
"""
}

View file

@ -0,0 +1,67 @@
name: checkm_qa
description: CheckM provides a set of tools for assessing the quality of genomes recovered from isolates, single cells, or metagenomes.
keywords:
- checkm
- mag
- metagenome
- quality
- isolates
- microbes
- single cells
- completeness
- contamination
- bins
- genome bins
- qa
- quality assurnce
tools:
- checkm:
description: Assess the quality of microbial genomes recovered from isolates, single cells, and metagenomes.
homepage: https://ecogenomics.github.io/CheckM/
documentation: https://github.com/Ecogenomics/CheckM/wiki
tool_dev_url: https://github.com/Ecogenomics/CheckM
doi: "10.1101/gr.186072.114"
licence: ["GPL v3"]
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- analysis_dir:
type: file
description: Directory containing output of checkm/analyze or checkm/lineage_wf etc.
pattern: "*"
- marker_file:
type: file
description: Marker file specified during checkm/analyze or produced by checkm/{lineage,taxonomy}_wf
pattern: "*.ms"
- coverage_file:
type: file
description: File containing coverage of each sequence (generated by checkm coverage)
- exclude_marker_file:
type: file
description: File specifying markers to exclude from marker sets
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- output:
type: file
description: "Default completeness statistics in various formats, as specified with --out_format (excluding option: 9)"
pattern: "*.txt"
- fasta:
type: file
description: Output in fasta format (only if --out_format 9)
pattern: "*.fasta"
authors:
- "@jfy133"

View file

@ -499,6 +499,10 @@ checkm/lineagewf:
- modules/checkm/lineagewf/**
- tests/modules/checkm/lineagewf/**
checkm/qa:
- modules/checkm/qa/**
- tests/modules/checkm/qa/**
chromap/chromap:
- modules/chromap/chromap/**
- tests/modules/chromap/chromap/**

View file

@ -0,0 +1,44 @@
#!/usr/bin/env nextflow
nextflow.enable.dsl = 2
include { CHECKM_LINEAGEWF } from '../../../../modules/checkm/lineagewf/main.nf'
include { CHECKM_LINEAGEWF as CHECKM_LINEAGEWF_FASTA } from '../../../../modules/checkm/lineagewf/main.nf'
include { CHECKM_QA } from '../../../../modules/checkm/qa/main.nf'
include { CHECKM_QA as CHECKM_QA_FASTA } from '../../../../modules/checkm/qa/main.nf'
workflow test_checkm_qa {
input = [ [ id:'test', single_end:false ], // meta map
file(params.test_data['sarscov2']['illumina']['contigs_fasta'], checkIfExists: true) ]
fasta_ext = 'fasta'
CHECKM_LINEAGEWF ( input, fasta_ext, [] )
ch_checkmqa_input = CHECKM_LINEAGEWF.out.checkm_output
.join(CHECKM_LINEAGEWF.out.marker_file)
.map{
meta, dir, marker ->
[ meta, dir, marker, []]
}
CHECKM_QA ( ch_checkmqa_input, [] )
}
workflow test_checkm_qa_fasta {
input = [ [ id:'test', single_end:false ], // meta map
file(params.test_data['sarscov2']['illumina']['contigs_fasta'], checkIfExists: true) ]
fasta_ext = 'fasta'
CHECKM_LINEAGEWF_FASTA ( input, fasta_ext, [] )
ch_checkmqa_input = CHECKM_LINEAGEWF_FASTA.out.checkm_output
.join(CHECKM_LINEAGEWF_FASTA.out.marker_file)
.map{
meta, dir, marker ->
[ meta, dir, marker, []]
}
CHECKM_QA_FASTA ( ch_checkmqa_input, [] )
}

View file

@ -0,0 +1,16 @@
process {
publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
withName: CHECKM_QA {
ext.prefix = { "${meta.id}.qa" }
}
withName: CHECKM_QA_FASTA {
ext.args = "-o 9"
ext.prefix = { "${meta.id}.qa" }
}
}

View file

@ -0,0 +1,16 @@
- name: checkm qa test_checkm_qa
command: nextflow run ./tests/modules/checkm/qa -entry test_checkm_qa -c ./tests/config/nextflow.config -c ./tests/modules/checkm/qa/nextflow.config
tags:
- checkm/qa
- checkm
files:
- path: output/checkm/test.qa.txt
md5sum: 645f4282569afb4b171396732b2d2582
- name: checkm qa test_checkm_qa_fasta
command: nextflow run ./tests/modules/checkm/qa -entry test_checkm_qa_fasta -c ./tests/config/nextflow.config -c ./tests/modules/checkm/qa/nextflow.config
tags:
- checkm/qa
- checkm
files:
- path: output/checkm/test.qa.fasta