From ab67a1d41b63bf52fd7c147f7f8f6e8d167590b5 Mon Sep 17 00:00:00 2001 From: Gregor Sturm Date: Fri, 24 Sep 2021 11:01:54 +0200 Subject: [PATCH] Update fastqc to produce multi-version versions.yml (#665) * Update fastqc to produce multi-version versions.yml * Update readme and pull request template * Fix markdownlint * remove variable * Change publish dir to lowercase * Re-add getSoftwareName * Add custom pytest-workflow test to ensure versions.yml is valid * Add docstring * Remove __init__.py as it is not needed * Remove changes to README, since this part went to nf-co.re * Add NF_CORE_TEST env var * Fix editorconfig * Add additional consistency checks for versions.yml * Update multiqc module * Fix output channel --- .github/PULL_REQUEST_TEMPLATE.md | 2 +- .github/workflows/pytest-workflow.yml | 2 +- .gitignore | 3 ++ modules/fastqc/functions.nf | 58 ++++++++++++++++----------- modules/fastqc/main.nf | 17 +++++--- modules/fastqc/meta.yml | 2 +- modules/multiqc/functions.nf | 58 ++++++++++++++++----------- modules/multiqc/main.nf | 10 +++-- modules/multiqc/meta.yml | 2 +- tests/test_versions_yml.py | 40 ++++++++++++++++++ 10 files changed, 134 insertions(+), 60 deletions(-) create mode 100644 tests/test_versions_yml.py diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 059133d6..b9f7a4e8 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -20,7 +20,7 @@ Closes #XXX - [ ] If you've added a new tool - have you followed the module conventions in the [contribution docs](https://github.com/nf-core/modules/tree/master/.github/CONTRIBUTING.md) - [ ] If necessary, include test data in your PR. - [ ] Remove all TODO statements. -- [ ] Emit the `.version.txt` file. +- [ ] Emit the `versions.yml` file. - [ ] Follow the naming conventions. - [ ] Follow the parameters requirements. - [ ] Follow the input/output options guidelines. diff --git a/.github/workflows/pytest-workflow.yml b/.github/workflows/pytest-workflow.yml index 43f48c36..9cd768a8 100644 --- a/.github/workflows/pytest-workflow.yml +++ b/.github/workflows/pytest-workflow.yml @@ -89,7 +89,7 @@ jobs: # Test the module - name: Run pytest-workflow # only use one thread for pytest-workflow to avoid race condition on conda cache. - run: TMPDIR=~ PROFILE=${{ matrix.profile }} pytest --tag ${{ matrix.tags }} --symlink --kwdof + run: NF_CORE_MODULES_TEST=1 TMPDIR=~ PROFILE=${{ matrix.profile }} pytest --tag ${{ matrix.tags }} --symlink --kwdof - name: Upload logs on failure if: failure() diff --git a/.gitignore b/.gitignore index 71b9b179..9d982e3f 100644 --- a/.gitignore +++ b/.gitignore @@ -7,4 +7,7 @@ output/ *.code-workspace .screenrc .*.sw? +__pycache__ +*.pyo +*.pyc tests/data/ diff --git a/modules/fastqc/functions.nf b/modules/fastqc/functions.nf index da9da093..85628ee0 100644 --- a/modules/fastqc/functions.nf +++ b/modules/fastqc/functions.nf @@ -9,6 +9,13 @@ def getSoftwareName(task_process) { return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase() } +// +// Extract name of module from process name using $task.process +// +def getProcessName(task_process) { + return task_process.tokenize(':')[-1] +} + // // Function to initialise default values and to generate a Groovy Map of available options for nf-core modules // @@ -37,32 +44,35 @@ def getPathFromList(path_list) { // Function to save/publish module results // def saveFiles(Map args) { - if (!args.filename.endsWith('.version.txt')) { - def ioptions = initOptions(args.options) - def path_list = [ ioptions.publish_dir ?: args.publish_dir ] - if (ioptions.publish_by_meta) { - def key_list = ioptions.publish_by_meta instanceof List ? ioptions.publish_by_meta : args.publish_by_meta - for (key in key_list) { - if (args.meta && key instanceof String) { - def path = key - if (args.meta.containsKey(key)) { - path = args.meta[key] instanceof Boolean ? "${key}_${args.meta[key]}".toString() : args.meta[key] - } - path = path instanceof String ? path : '' - path_list.add(path) + def ioptions = initOptions(args.options) + def path_list = [ ioptions.publish_dir ?: args.publish_dir ] + + // Do not publish versions.yml unless running from pytest workflow + if (args.filename.equals('versions.yml') && !System.getenv("NF_CORE_MODULES_TEST")) { + return null + } + if (ioptions.publish_by_meta) { + def key_list = ioptions.publish_by_meta instanceof List ? ioptions.publish_by_meta : args.publish_by_meta + for (key in key_list) { + if (args.meta && key instanceof String) { + def path = key + if (args.meta.containsKey(key)) { + path = args.meta[key] instanceof Boolean ? "${key}_${args.meta[key]}".toString() : args.meta[key] } + path = path instanceof String ? path : '' + path_list.add(path) } } - if (ioptions.publish_files instanceof Map) { - for (ext in ioptions.publish_files) { - if (args.filename.endsWith(ext.key)) { - def ext_list = path_list.collect() - ext_list.add(ext.value) - return "${getPathFromList(ext_list)}/$args.filename" - } - } - } else if (ioptions.publish_files == null) { - return "${getPathFromList(path_list)}/$args.filename" - } + } + if (ioptions.publish_files instanceof Map) { + for (ext in ioptions.publish_files) { + if (args.filename.endsWith(ext.key)) { + def ext_list = path_list.collect() + ext_list.add(ext.value) + return "${getPathFromList(ext_list)}/$args.filename" + } + } + } else if (ioptions.publish_files == null) { + return "${getPathFromList(path_list)}/$args.filename" } } diff --git a/modules/fastqc/main.nf b/modules/fastqc/main.nf index 39c327b2..88bfbf5b 100644 --- a/modules/fastqc/main.nf +++ b/modules/fastqc/main.nf @@ -1,5 +1,5 @@ // Import generic module functions -include { initOptions; saveFiles; getSoftwareName } from './functions' +include { initOptions; saveFiles; getSoftwareName; getProcessName } from './functions' params.options = [:] options = initOptions(params.options) @@ -24,24 +24,31 @@ process FASTQC { output: tuple val(meta), path("*.html"), emit: html tuple val(meta), path("*.zip") , emit: zip - path "*.version.txt" , emit: version + path "versions.yml" , emit: version script: // Add soft-links to original FastQs for consistent naming in pipeline - def software = getSoftwareName(task.process) def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}" if (meta.single_end) { """ [ ! -f ${prefix}.fastq.gz ] && ln -s $reads ${prefix}.fastq.gz fastqc $options.args --threads $task.cpus ${prefix}.fastq.gz - fastqc --version | sed -e "s/FastQC v//g" > ${software}.version.txt + + cat <<-END_VERSIONS > versions.yml + ${getProcessName(task.process)}: + fastqc: \$( fastqc --version | sed -e "s/FastQC v//g" ) + END_VERSIONS """ } else { """ [ ! -f ${prefix}_1.fastq.gz ] && ln -s ${reads[0]} ${prefix}_1.fastq.gz [ ! -f ${prefix}_2.fastq.gz ] && ln -s ${reads[1]} ${prefix}_2.fastq.gz fastqc $options.args --threads $task.cpus ${prefix}_1.fastq.gz ${prefix}_2.fastq.gz - fastqc --version | sed -e "s/FastQC v//g" > ${software}.version.txt + + cat <<-END_VERSIONS > versions.yml + ${getProcessName(task.process)}: + fastqc: \$( fastqc --version | sed -e "s/FastQC v//g" ) + END_VERSIONS """ } } diff --git a/modules/fastqc/meta.yml b/modules/fastqc/meta.yml index 8eb9953d..48031356 100644 --- a/modules/fastqc/meta.yml +++ b/modules/fastqc/meta.yml @@ -43,7 +43,7 @@ output: - version: type: file description: File containing software version - pattern: "*.{version.txt}" + pattern: "versions.yml" authors: - "@drpatelh" - "@grst" diff --git a/modules/multiqc/functions.nf b/modules/multiqc/functions.nf index da9da093..85628ee0 100644 --- a/modules/multiqc/functions.nf +++ b/modules/multiqc/functions.nf @@ -9,6 +9,13 @@ def getSoftwareName(task_process) { return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase() } +// +// Extract name of module from process name using $task.process +// +def getProcessName(task_process) { + return task_process.tokenize(':')[-1] +} + // // Function to initialise default values and to generate a Groovy Map of available options for nf-core modules // @@ -37,32 +44,35 @@ def getPathFromList(path_list) { // Function to save/publish module results // def saveFiles(Map args) { - if (!args.filename.endsWith('.version.txt')) { - def ioptions = initOptions(args.options) - def path_list = [ ioptions.publish_dir ?: args.publish_dir ] - if (ioptions.publish_by_meta) { - def key_list = ioptions.publish_by_meta instanceof List ? ioptions.publish_by_meta : args.publish_by_meta - for (key in key_list) { - if (args.meta && key instanceof String) { - def path = key - if (args.meta.containsKey(key)) { - path = args.meta[key] instanceof Boolean ? "${key}_${args.meta[key]}".toString() : args.meta[key] - } - path = path instanceof String ? path : '' - path_list.add(path) + def ioptions = initOptions(args.options) + def path_list = [ ioptions.publish_dir ?: args.publish_dir ] + + // Do not publish versions.yml unless running from pytest workflow + if (args.filename.equals('versions.yml') && !System.getenv("NF_CORE_MODULES_TEST")) { + return null + } + if (ioptions.publish_by_meta) { + def key_list = ioptions.publish_by_meta instanceof List ? ioptions.publish_by_meta : args.publish_by_meta + for (key in key_list) { + if (args.meta && key instanceof String) { + def path = key + if (args.meta.containsKey(key)) { + path = args.meta[key] instanceof Boolean ? "${key}_${args.meta[key]}".toString() : args.meta[key] } + path = path instanceof String ? path : '' + path_list.add(path) } } - if (ioptions.publish_files instanceof Map) { - for (ext in ioptions.publish_files) { - if (args.filename.endsWith(ext.key)) { - def ext_list = path_list.collect() - ext_list.add(ext.value) - return "${getPathFromList(ext_list)}/$args.filename" - } - } - } else if (ioptions.publish_files == null) { - return "${getPathFromList(path_list)}/$args.filename" - } + } + if (ioptions.publish_files instanceof Map) { + for (ext in ioptions.publish_files) { + if (args.filename.endsWith(ext.key)) { + def ext_list = path_list.collect() + ext_list.add(ext.value) + return "${getPathFromList(ext_list)}/$args.filename" + } + } + } else if (ioptions.publish_files == null) { + return "${getPathFromList(path_list)}/$args.filename" } } diff --git a/modules/multiqc/main.nf b/modules/multiqc/main.nf index 8b6d6f0c..2e7ad932 100644 --- a/modules/multiqc/main.nf +++ b/modules/multiqc/main.nf @@ -1,5 +1,5 @@ // Import generic module functions -include { initOptions; saveFiles; getSoftwareName } from './functions' +include { initOptions; saveFiles; getSoftwareName; getProcessName } from './functions' params.options = [:] options = initOptions(params.options) @@ -24,12 +24,16 @@ process MULTIQC { path "*multiqc_report.html", emit: report path "*_data" , emit: data path "*_plots" , optional:true, emit: plots - path "*.version.txt" , emit: version + path "versions.yml" , emit: version script: def software = getSoftwareName(task.process) """ multiqc -f $options.args . - multiqc --version | sed -e "s/multiqc, version //g" > ${software}.version.txt + + cat <<-END_VERSIONS > versions.yml + ${getProcessName(task.process)}: + multiqc: \$( multiqc --version | sed -e "s/multiqc, version //g" ) + END_VERSIONS """ } diff --git a/modules/multiqc/meta.yml b/modules/multiqc/meta.yml index 532a8bb1..2d99ec0d 100644 --- a/modules/multiqc/meta.yml +++ b/modules/multiqc/meta.yml @@ -32,7 +32,7 @@ output: - version: type: file description: File containing software version - pattern: "*.{version.txt}" + pattern: "versions.yml" authors: - "@abhi18av" - "@bunop" diff --git a/tests/test_versions_yml.py b/tests/test_versions_yml.py new file mode 100644 index 00000000..5d44c7a0 --- /dev/null +++ b/tests/test_versions_yml.py @@ -0,0 +1,40 @@ +from pathlib import Path +import pytest +import yaml +import re + + +def _get_workflow_names(): + """Get all names of all workflows which have a test.yml in the tests directory. + + To do so, recursively finds all test.yml files and parses their content. + """ + here = Path(__file__).parent.resolve() + pytest_workflow_files = here.glob("**/test.yml") + for f in pytest_workflow_files: + test_config = yaml.safe_load(f.read_text()) + for workflow in test_config: + yield workflow["name"] + + +@pytest.mark.workflow(*_get_workflow_names()) +def test_ensure_valid_version_yml(workflow_dir): + workflow_dir = Path(workflow_dir) + software_name = workflow_dir.name.split("_")[0].lower() + versions_yml = (workflow_dir / f"output/{software_name}/versions.yml").read_text() + + assert ( + "END_VERSIONS" not in versions_yml + ), "END_VERSIONS detected in versions.yml. END_VERSIONS being in the text is a sign of an ill-formatted HEREDOC" + + # Raises an exception if yaml is not valid + versions = yaml.safe_load(versions_yml) + try: + software_versions = versions[software_name.upper()] + except KeyError: + raise AssertionError("There is no entry `` in versions.yml. ") + assert len(software_versions), "There must be at least one version emitted." + for tool, version in software_versions.items(): + assert re.match( + r"^\d+.*", str(version) + ), f"Version number for {tool} must start with a number. "