Update fastqc to produce multi-version versions.yml (#665)

* Update fastqc to produce multi-version versions.yml

* Update readme and pull request template

* Fix markdownlint

* remove  variable

* Change publish dir to lowercase

* Re-add getSoftwareName

* Add custom pytest-workflow test to ensure versions.yml is valid

* Add docstring

* Remove __init__.py as it is not needed

* Remove changes to README, since this part went to nf-co.re

* Add NF_CORE_TEST env var

* Fix editorconfig

* Add additional consistency checks for versions.yml

* Update multiqc module

* Fix output channel
This commit is contained in:
Gregor Sturm 2021-09-24 11:01:54 +02:00 committed by GitHub
parent ca53f7525b
commit ab67a1d41b
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
10 changed files with 134 additions and 60 deletions

View file

@ -20,7 +20,7 @@ Closes #XXX <!-- If this PR fixes an issue, please link it here! -->
- [ ] If you've added a new tool - have you followed the module conventions in the [contribution docs](https://github.com/nf-core/modules/tree/master/.github/CONTRIBUTING.md) - [ ] If you've added a new tool - have you followed the module conventions in the [contribution docs](https://github.com/nf-core/modules/tree/master/.github/CONTRIBUTING.md)
- [ ] If necessary, include test data in your PR. - [ ] If necessary, include test data in your PR.
- [ ] Remove all TODO statements. - [ ] Remove all TODO statements.
- [ ] Emit the `<SOFTWARE>.version.txt` file. - [ ] Emit the `versions.yml` file.
- [ ] Follow the naming conventions. - [ ] Follow the naming conventions.
- [ ] Follow the parameters requirements. - [ ] Follow the parameters requirements.
- [ ] Follow the input/output options guidelines. - [ ] Follow the input/output options guidelines.

View file

@ -89,7 +89,7 @@ jobs:
# Test the module # Test the module
- name: Run pytest-workflow - name: Run pytest-workflow
# only use one thread for pytest-workflow to avoid race condition on conda cache. # only use one thread for pytest-workflow to avoid race condition on conda cache.
run: TMPDIR=~ PROFILE=${{ matrix.profile }} pytest --tag ${{ matrix.tags }} --symlink --kwdof run: NF_CORE_MODULES_TEST=1 TMPDIR=~ PROFILE=${{ matrix.profile }} pytest --tag ${{ matrix.tags }} --symlink --kwdof
- name: Upload logs on failure - name: Upload logs on failure
if: failure() if: failure()

3
.gitignore vendored
View file

@ -7,4 +7,7 @@ output/
*.code-workspace *.code-workspace
.screenrc .screenrc
.*.sw? .*.sw?
__pycache__
*.pyo
*.pyc
tests/data/ tests/data/

View file

@ -9,6 +9,13 @@ def getSoftwareName(task_process) {
return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase() return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()
} }
//
// Extract name of module from process name using $task.process
//
def getProcessName(task_process) {
return task_process.tokenize(':')[-1]
}
// //
// Function to initialise default values and to generate a Groovy Map of available options for nf-core modules // Function to initialise default values and to generate a Groovy Map of available options for nf-core modules
// //
@ -37,9 +44,13 @@ def getPathFromList(path_list) {
// Function to save/publish module results // Function to save/publish module results
// //
def saveFiles(Map args) { def saveFiles(Map args) {
if (!args.filename.endsWith('.version.txt')) {
def ioptions = initOptions(args.options) def ioptions = initOptions(args.options)
def path_list = [ ioptions.publish_dir ?: args.publish_dir ] def path_list = [ ioptions.publish_dir ?: args.publish_dir ]
// Do not publish versions.yml unless running from pytest workflow
if (args.filename.equals('versions.yml') && !System.getenv("NF_CORE_MODULES_TEST")) {
return null
}
if (ioptions.publish_by_meta) { if (ioptions.publish_by_meta) {
def key_list = ioptions.publish_by_meta instanceof List ? ioptions.publish_by_meta : args.publish_by_meta def key_list = ioptions.publish_by_meta instanceof List ? ioptions.publish_by_meta : args.publish_by_meta
for (key in key_list) { for (key in key_list) {
@ -65,4 +76,3 @@ def saveFiles(Map args) {
return "${getPathFromList(path_list)}/$args.filename" return "${getPathFromList(path_list)}/$args.filename"
} }
} }
}

View file

@ -1,5 +1,5 @@
// Import generic module functions // Import generic module functions
include { initOptions; saveFiles; getSoftwareName } from './functions' include { initOptions; saveFiles; getSoftwareName; getProcessName } from './functions'
params.options = [:] params.options = [:]
options = initOptions(params.options) options = initOptions(params.options)
@ -24,24 +24,31 @@ process FASTQC {
output: output:
tuple val(meta), path("*.html"), emit: html tuple val(meta), path("*.html"), emit: html
tuple val(meta), path("*.zip") , emit: zip tuple val(meta), path("*.zip") , emit: zip
path "*.version.txt" , emit: version path "versions.yml" , emit: version
script: script:
// Add soft-links to original FastQs for consistent naming in pipeline // Add soft-links to original FastQs for consistent naming in pipeline
def software = getSoftwareName(task.process)
def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}" def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}"
if (meta.single_end) { if (meta.single_end) {
""" """
[ ! -f ${prefix}.fastq.gz ] && ln -s $reads ${prefix}.fastq.gz [ ! -f ${prefix}.fastq.gz ] && ln -s $reads ${prefix}.fastq.gz
fastqc $options.args --threads $task.cpus ${prefix}.fastq.gz fastqc $options.args --threads $task.cpus ${prefix}.fastq.gz
fastqc --version | sed -e "s/FastQC v//g" > ${software}.version.txt
cat <<-END_VERSIONS > versions.yml
${getProcessName(task.process)}:
fastqc: \$( fastqc --version | sed -e "s/FastQC v//g" )
END_VERSIONS
""" """
} else { } else {
""" """
[ ! -f ${prefix}_1.fastq.gz ] && ln -s ${reads[0]} ${prefix}_1.fastq.gz [ ! -f ${prefix}_1.fastq.gz ] && ln -s ${reads[0]} ${prefix}_1.fastq.gz
[ ! -f ${prefix}_2.fastq.gz ] && ln -s ${reads[1]} ${prefix}_2.fastq.gz [ ! -f ${prefix}_2.fastq.gz ] && ln -s ${reads[1]} ${prefix}_2.fastq.gz
fastqc $options.args --threads $task.cpus ${prefix}_1.fastq.gz ${prefix}_2.fastq.gz fastqc $options.args --threads $task.cpus ${prefix}_1.fastq.gz ${prefix}_2.fastq.gz
fastqc --version | sed -e "s/FastQC v//g" > ${software}.version.txt
cat <<-END_VERSIONS > versions.yml
${getProcessName(task.process)}:
fastqc: \$( fastqc --version | sed -e "s/FastQC v//g" )
END_VERSIONS
""" """
} }
} }

View file

@ -43,7 +43,7 @@ output:
- version: - version:
type: file type: file
description: File containing software version description: File containing software version
pattern: "*.{version.txt}" pattern: "versions.yml"
authors: authors:
- "@drpatelh" - "@drpatelh"
- "@grst" - "@grst"

View file

@ -9,6 +9,13 @@ def getSoftwareName(task_process) {
return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase() return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()
} }
//
// Extract name of module from process name using $task.process
//
def getProcessName(task_process) {
return task_process.tokenize(':')[-1]
}
// //
// Function to initialise default values and to generate a Groovy Map of available options for nf-core modules // Function to initialise default values and to generate a Groovy Map of available options for nf-core modules
// //
@ -37,9 +44,13 @@ def getPathFromList(path_list) {
// Function to save/publish module results // Function to save/publish module results
// //
def saveFiles(Map args) { def saveFiles(Map args) {
if (!args.filename.endsWith('.version.txt')) {
def ioptions = initOptions(args.options) def ioptions = initOptions(args.options)
def path_list = [ ioptions.publish_dir ?: args.publish_dir ] def path_list = [ ioptions.publish_dir ?: args.publish_dir ]
// Do not publish versions.yml unless running from pytest workflow
if (args.filename.equals('versions.yml') && !System.getenv("NF_CORE_MODULES_TEST")) {
return null
}
if (ioptions.publish_by_meta) { if (ioptions.publish_by_meta) {
def key_list = ioptions.publish_by_meta instanceof List ? ioptions.publish_by_meta : args.publish_by_meta def key_list = ioptions.publish_by_meta instanceof List ? ioptions.publish_by_meta : args.publish_by_meta
for (key in key_list) { for (key in key_list) {
@ -65,4 +76,3 @@ def saveFiles(Map args) {
return "${getPathFromList(path_list)}/$args.filename" return "${getPathFromList(path_list)}/$args.filename"
} }
} }
}

View file

@ -1,5 +1,5 @@
// Import generic module functions // Import generic module functions
include { initOptions; saveFiles; getSoftwareName } from './functions' include { initOptions; saveFiles; getSoftwareName; getProcessName } from './functions'
params.options = [:] params.options = [:]
options = initOptions(params.options) options = initOptions(params.options)
@ -24,12 +24,16 @@ process MULTIQC {
path "*multiqc_report.html", emit: report path "*multiqc_report.html", emit: report
path "*_data" , emit: data path "*_data" , emit: data
path "*_plots" , optional:true, emit: plots path "*_plots" , optional:true, emit: plots
path "*.version.txt" , emit: version path "versions.yml" , emit: version
script: script:
def software = getSoftwareName(task.process) def software = getSoftwareName(task.process)
""" """
multiqc -f $options.args . multiqc -f $options.args .
multiqc --version | sed -e "s/multiqc, version //g" > ${software}.version.txt
cat <<-END_VERSIONS > versions.yml
${getProcessName(task.process)}:
multiqc: \$( multiqc --version | sed -e "s/multiqc, version //g" )
END_VERSIONS
""" """
} }

View file

@ -32,7 +32,7 @@ output:
- version: - version:
type: file type: file
description: File containing software version description: File containing software version
pattern: "*.{version.txt}" pattern: "versions.yml"
authors: authors:
- "@abhi18av" - "@abhi18av"
- "@bunop" - "@bunop"

View file

@ -0,0 +1,40 @@
from pathlib import Path
import pytest
import yaml
import re
def _get_workflow_names():
"""Get all names of all workflows which have a test.yml in the tests directory.
To do so, recursively finds all test.yml files and parses their content.
"""
here = Path(__file__).parent.resolve()
pytest_workflow_files = here.glob("**/test.yml")
for f in pytest_workflow_files:
test_config = yaml.safe_load(f.read_text())
for workflow in test_config:
yield workflow["name"]
@pytest.mark.workflow(*_get_workflow_names())
def test_ensure_valid_version_yml(workflow_dir):
workflow_dir = Path(workflow_dir)
software_name = workflow_dir.name.split("_")[0].lower()
versions_yml = (workflow_dir / f"output/{software_name}/versions.yml").read_text()
assert (
"END_VERSIONS" not in versions_yml
), "END_VERSIONS detected in versions.yml. END_VERSIONS being in the text is a sign of an ill-formatted HEREDOC"
# Raises an exception if yaml is not valid
versions = yaml.safe_load(versions_yml)
try:
software_versions = versions[software_name.upper()]
except KeyError:
raise AssertionError("There is no entry `<SOFTWARE>` in versions.yml. ")
assert len(software_versions), "There must be at least one version emitted."
for tool, version in software_versions.items():
assert re.match(
r"^\d+.*", str(version)
), f"Version number for {tool} must start with a number. "