Update fastqc to produce multi-version versions.yml (#665)

* Update fastqc to produce multi-version versions.yml

* Update readme and pull request template

* Fix markdownlint

* remove  variable

* Change publish dir to lowercase

* Re-add getSoftwareName

* Add custom pytest-workflow test to ensure versions.yml is valid

* Add docstring

* Remove __init__.py as it is not needed

* Remove changes to README, since this part went to nf-co.re

* Add NF_CORE_TEST env var

* Fix editorconfig

* Add additional consistency checks for versions.yml

* Update multiqc module

* Fix output channel
This commit is contained in:
Gregor Sturm 2021-09-24 11:01:54 +02:00 committed by GitHub
parent ca53f7525b
commit ab67a1d41b
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
10 changed files with 134 additions and 60 deletions

View file

@ -20,7 +20,7 @@ Closes #XXX <!-- If this PR fixes an issue, please link it here! -->
- [ ] If you've added a new tool - have you followed the module conventions in the [contribution docs](https://github.com/nf-core/modules/tree/master/.github/CONTRIBUTING.md)
- [ ] If necessary, include test data in your PR.
- [ ] Remove all TODO statements.
- [ ] Emit the `<SOFTWARE>.version.txt` file.
- [ ] Emit the `versions.yml` file.
- [ ] Follow the naming conventions.
- [ ] Follow the parameters requirements.
- [ ] Follow the input/output options guidelines.

View file

@ -89,7 +89,7 @@ jobs:
# Test the module
- name: Run pytest-workflow
# only use one thread for pytest-workflow to avoid race condition on conda cache.
run: TMPDIR=~ PROFILE=${{ matrix.profile }} pytest --tag ${{ matrix.tags }} --symlink --kwdof
run: NF_CORE_MODULES_TEST=1 TMPDIR=~ PROFILE=${{ matrix.profile }} pytest --tag ${{ matrix.tags }} --symlink --kwdof
- name: Upload logs on failure
if: failure()

3
.gitignore vendored
View file

@ -7,4 +7,7 @@ output/
*.code-workspace
.screenrc
.*.sw?
__pycache__
*.pyo
*.pyc
tests/data/

View file

@ -9,6 +9,13 @@ def getSoftwareName(task_process) {
return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()
}
//
// Extract name of module from process name using $task.process
//
def getProcessName(task_process) {
return task_process.tokenize(':')[-1]
}
//
// Function to initialise default values and to generate a Groovy Map of available options for nf-core modules
//
@ -37,32 +44,35 @@ def getPathFromList(path_list) {
// Function to save/publish module results
//
def saveFiles(Map args) {
if (!args.filename.endsWith('.version.txt')) {
def ioptions = initOptions(args.options)
def path_list = [ ioptions.publish_dir ?: args.publish_dir ]
if (ioptions.publish_by_meta) {
def key_list = ioptions.publish_by_meta instanceof List ? ioptions.publish_by_meta : args.publish_by_meta
for (key in key_list) {
if (args.meta && key instanceof String) {
def path = key
if (args.meta.containsKey(key)) {
path = args.meta[key] instanceof Boolean ? "${key}_${args.meta[key]}".toString() : args.meta[key]
}
path = path instanceof String ? path : ''
path_list.add(path)
def ioptions = initOptions(args.options)
def path_list = [ ioptions.publish_dir ?: args.publish_dir ]
// Do not publish versions.yml unless running from pytest workflow
if (args.filename.equals('versions.yml') && !System.getenv("NF_CORE_MODULES_TEST")) {
return null
}
if (ioptions.publish_by_meta) {
def key_list = ioptions.publish_by_meta instanceof List ? ioptions.publish_by_meta : args.publish_by_meta
for (key in key_list) {
if (args.meta && key instanceof String) {
def path = key
if (args.meta.containsKey(key)) {
path = args.meta[key] instanceof Boolean ? "${key}_${args.meta[key]}".toString() : args.meta[key]
}
path = path instanceof String ? path : ''
path_list.add(path)
}
}
if (ioptions.publish_files instanceof Map) {
for (ext in ioptions.publish_files) {
if (args.filename.endsWith(ext.key)) {
def ext_list = path_list.collect()
ext_list.add(ext.value)
return "${getPathFromList(ext_list)}/$args.filename"
}
}
} else if (ioptions.publish_files == null) {
return "${getPathFromList(path_list)}/$args.filename"
}
}
if (ioptions.publish_files instanceof Map) {
for (ext in ioptions.publish_files) {
if (args.filename.endsWith(ext.key)) {
def ext_list = path_list.collect()
ext_list.add(ext.value)
return "${getPathFromList(ext_list)}/$args.filename"
}
}
} else if (ioptions.publish_files == null) {
return "${getPathFromList(path_list)}/$args.filename"
}
}

View file

@ -1,5 +1,5 @@
// Import generic module functions
include { initOptions; saveFiles; getSoftwareName } from './functions'
include { initOptions; saveFiles; getSoftwareName; getProcessName } from './functions'
params.options = [:]
options = initOptions(params.options)
@ -24,24 +24,31 @@ process FASTQC {
output:
tuple val(meta), path("*.html"), emit: html
tuple val(meta), path("*.zip") , emit: zip
path "*.version.txt" , emit: version
path "versions.yml" , emit: version
script:
// Add soft-links to original FastQs for consistent naming in pipeline
def software = getSoftwareName(task.process)
def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}"
if (meta.single_end) {
"""
[ ! -f ${prefix}.fastq.gz ] && ln -s $reads ${prefix}.fastq.gz
fastqc $options.args --threads $task.cpus ${prefix}.fastq.gz
fastqc --version | sed -e "s/FastQC v//g" > ${software}.version.txt
cat <<-END_VERSIONS > versions.yml
${getProcessName(task.process)}:
fastqc: \$( fastqc --version | sed -e "s/FastQC v//g" )
END_VERSIONS
"""
} else {
"""
[ ! -f ${prefix}_1.fastq.gz ] && ln -s ${reads[0]} ${prefix}_1.fastq.gz
[ ! -f ${prefix}_2.fastq.gz ] && ln -s ${reads[1]} ${prefix}_2.fastq.gz
fastqc $options.args --threads $task.cpus ${prefix}_1.fastq.gz ${prefix}_2.fastq.gz
fastqc --version | sed -e "s/FastQC v//g" > ${software}.version.txt
cat <<-END_VERSIONS > versions.yml
${getProcessName(task.process)}:
fastqc: \$( fastqc --version | sed -e "s/FastQC v//g" )
END_VERSIONS
"""
}
}

View file

@ -43,7 +43,7 @@ output:
- version:
type: file
description: File containing software version
pattern: "*.{version.txt}"
pattern: "versions.yml"
authors:
- "@drpatelh"
- "@grst"

View file

@ -9,6 +9,13 @@ def getSoftwareName(task_process) {
return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()
}
//
// Extract name of module from process name using $task.process
//
def getProcessName(task_process) {
return task_process.tokenize(':')[-1]
}
//
// Function to initialise default values and to generate a Groovy Map of available options for nf-core modules
//
@ -37,32 +44,35 @@ def getPathFromList(path_list) {
// Function to save/publish module results
//
def saveFiles(Map args) {
if (!args.filename.endsWith('.version.txt')) {
def ioptions = initOptions(args.options)
def path_list = [ ioptions.publish_dir ?: args.publish_dir ]
if (ioptions.publish_by_meta) {
def key_list = ioptions.publish_by_meta instanceof List ? ioptions.publish_by_meta : args.publish_by_meta
for (key in key_list) {
if (args.meta && key instanceof String) {
def path = key
if (args.meta.containsKey(key)) {
path = args.meta[key] instanceof Boolean ? "${key}_${args.meta[key]}".toString() : args.meta[key]
}
path = path instanceof String ? path : ''
path_list.add(path)
def ioptions = initOptions(args.options)
def path_list = [ ioptions.publish_dir ?: args.publish_dir ]
// Do not publish versions.yml unless running from pytest workflow
if (args.filename.equals('versions.yml') && !System.getenv("NF_CORE_MODULES_TEST")) {
return null
}
if (ioptions.publish_by_meta) {
def key_list = ioptions.publish_by_meta instanceof List ? ioptions.publish_by_meta : args.publish_by_meta
for (key in key_list) {
if (args.meta && key instanceof String) {
def path = key
if (args.meta.containsKey(key)) {
path = args.meta[key] instanceof Boolean ? "${key}_${args.meta[key]}".toString() : args.meta[key]
}
path = path instanceof String ? path : ''
path_list.add(path)
}
}
if (ioptions.publish_files instanceof Map) {
for (ext in ioptions.publish_files) {
if (args.filename.endsWith(ext.key)) {
def ext_list = path_list.collect()
ext_list.add(ext.value)
return "${getPathFromList(ext_list)}/$args.filename"
}
}
} else if (ioptions.publish_files == null) {
return "${getPathFromList(path_list)}/$args.filename"
}
}
if (ioptions.publish_files instanceof Map) {
for (ext in ioptions.publish_files) {
if (args.filename.endsWith(ext.key)) {
def ext_list = path_list.collect()
ext_list.add(ext.value)
return "${getPathFromList(ext_list)}/$args.filename"
}
}
} else if (ioptions.publish_files == null) {
return "${getPathFromList(path_list)}/$args.filename"
}
}

View file

@ -1,5 +1,5 @@
// Import generic module functions
include { initOptions; saveFiles; getSoftwareName } from './functions'
include { initOptions; saveFiles; getSoftwareName; getProcessName } from './functions'
params.options = [:]
options = initOptions(params.options)
@ -24,12 +24,16 @@ process MULTIQC {
path "*multiqc_report.html", emit: report
path "*_data" , emit: data
path "*_plots" , optional:true, emit: plots
path "*.version.txt" , emit: version
path "versions.yml" , emit: version
script:
def software = getSoftwareName(task.process)
"""
multiqc -f $options.args .
multiqc --version | sed -e "s/multiqc, version //g" > ${software}.version.txt
cat <<-END_VERSIONS > versions.yml
${getProcessName(task.process)}:
multiqc: \$( multiqc --version | sed -e "s/multiqc, version //g" )
END_VERSIONS
"""
}

View file

@ -32,7 +32,7 @@ output:
- version:
type: file
description: File containing software version
pattern: "*.{version.txt}"
pattern: "versions.yml"
authors:
- "@abhi18av"
- "@bunop"

View file

@ -0,0 +1,40 @@
from pathlib import Path
import pytest
import yaml
import re
def _get_workflow_names():
"""Get all names of all workflows which have a test.yml in the tests directory.
To do so, recursively finds all test.yml files and parses their content.
"""
here = Path(__file__).parent.resolve()
pytest_workflow_files = here.glob("**/test.yml")
for f in pytest_workflow_files:
test_config = yaml.safe_load(f.read_text())
for workflow in test_config:
yield workflow["name"]
@pytest.mark.workflow(*_get_workflow_names())
def test_ensure_valid_version_yml(workflow_dir):
workflow_dir = Path(workflow_dir)
software_name = workflow_dir.name.split("_")[0].lower()
versions_yml = (workflow_dir / f"output/{software_name}/versions.yml").read_text()
assert (
"END_VERSIONS" not in versions_yml
), "END_VERSIONS detected in versions.yml. END_VERSIONS being in the text is a sign of an ill-formatted HEREDOC"
# Raises an exception if yaml is not valid
versions = yaml.safe_load(versions_yml)
try:
software_versions = versions[software_name.upper()]
except KeyError:
raise AssertionError("There is no entry `<SOFTWARE>` in versions.yml. ")
assert len(software_versions), "There must be at least one version emitted."
for tool, version in software_versions.items():
assert re.match(
r"^\d+.*", str(version)
), f"Version number for {tool} must start with a number. "