From b412e6dabc99ea1b1bae07e09a4dbcb6e92e5439 Mon Sep 17 00:00:00 2001 From: Priyanka Surana Date: Thu, 27 Jan 2022 14:36:18 +0000 Subject: [PATCH 01/29] Busco module commit with nf-core v2.2 c1 --- modules/busco/main.nf | 42 +++++++++++++++++++++++ modules/busco/meta.yml | 52 +++++++++++++++++++++++++++++ tests/config/pytest_modules.yml | 4 +++ tests/modules/busco/main.nf | 26 +++++++++++++++ tests/modules/busco/nextflow.config | 5 +++ tests/modules/busco/test.yml | 20 +++++++++++ 6 files changed, 149 insertions(+) create mode 100644 modules/busco/main.nf create mode 100644 modules/busco/meta.yml create mode 100644 tests/modules/busco/main.nf create mode 100644 tests/modules/busco/nextflow.config create mode 100644 tests/modules/busco/test.yml diff --git a/modules/busco/main.nf b/modules/busco/main.nf new file mode 100644 index 00000000..445f8ed4 --- /dev/null +++ b/modules/busco/main.nf @@ -0,0 +1,42 @@ +process BUSCO { + tag "$meta.id" + label 'process_medium' + + conda (params.enable_conda ? "bioconda::busco=5.2.2" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/busco:5.2.2--pyhdfd78af_0': + 'quay.io/biocontainers/busco:5.2.2--pyhdfd78af_0' }" + + input: + tuple val(meta), path(fasta) + path(augustus_config) + val(lineage) + + output: + tuple val(meta), path("${meta.id}/run_*/full_table.tsv"), emit: tsv + tuple val(meta), path("${meta.id}/run_*/short_summary.txt"), emit: txt + path "versions.yml" , emit: versions + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + if (lineage) args += " --lineage_dataset $lineage" + """ + # Ensure the input is uncompressed + gzip -cdf $fasta > __UNCOMPRESSED_FASTA_FILE__ + # Copy the image's AUGUSTUS config directory if it was not provided to the module + [ ! -e augustus_config ] && cp -a /usr/local/config augustus_config + AUGUSTUS_CONFIG_PATH=augustus_config \\ + busco \\ + $args \\ + --augustus \\ + --cpu $task.cpus \\ + --in __UNCOMPRESSED_FASTA_FILE__ \\ + --out $meta.id + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + busco: \$( busco --version 2>&1 | sed 's/^BUSCO //' ) + END_VERSIONS + """ +} diff --git a/modules/busco/meta.yml b/modules/busco/meta.yml new file mode 100644 index 00000000..0cd9e402 --- /dev/null +++ b/modules/busco/meta.yml @@ -0,0 +1,52 @@ +name: busco +description: Benchmarking Universal Single Copy Orthologs +keywords: + - quality control + - genome + - transcriptome + - proteome +tools: + - busco: + description: BUSCO provides measures for quantitative assessment of genome assembly, gene set, and transcriptome completeness based on evolutionarily informed expectations of gene content from near-universal single-copy orthologs selected from OrthoDB. + homepage: https://busco.ezlab.org/ + documentation: https://busco.ezlab.org/busco_userguide.html + tool_dev_url: https://gitlab.com/ezlab/busco + doi: "10.1007/978-1-4939-9173-0_14" + licence: ['MIT'] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fasta: + type: file + description: Nucleic or amino acid sequence file in FASTA format + pattern: "*.{fasta}" + - augustus_config: + type: directory + description: AUGUSTUS config directory + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - tsv: + type: file + description: Full summary table + pattern: "*.{tsv}" + - txt: + type: file + description: Short summary text + pattern: "*.{txt}" + +authors: + - "@priyanka-surana" + - "@charles-plessy" diff --git a/tests/config/pytest_modules.yml b/tests/config/pytest_modules.yml index 2321c918..920b85bd 100644 --- a/tests/config/pytest_modules.yml +++ b/tests/config/pytest_modules.yml @@ -230,6 +230,10 @@ bowtie2/build: - modules/bowtie2/build/** - tests/modules/bowtie2/build_test/** +busco: + - modules/busco/** + - tests/modules/busco/** + bwa/aln: - modules/bwa/aln/** - tests/modules/bwa/aln/** diff --git a/tests/modules/busco/main.nf b/tests/modules/busco/main.nf new file mode 100644 index 00000000..28a4921c --- /dev/null +++ b/tests/modules/busco/main.nf @@ -0,0 +1,26 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { BUSCO as BUSCO_BACTE } from '../../../modules/busco/main.nf' +include { BUSCO as BUSCO_CHR22 } from '../../../modules/busco/main.nf' +include { UNTAR } from '../../../modules/untar/main.nf' + +// This tests genome decompression, empty input channels and data download +workflow test_busco_bacteroidales { + input = [ [ id:'test' ], file(params.test_data['bacteroides_fragilis']['genome']['genome_fna_gz'], checkIfExists: true) ] + BUSCO_BACTE ( input, + [], + [] ) +} + +// This tests uncompressed genome, BUSCO lineage file provided via input channel, and offline mode +workflow test_busco_chr22 { + input = [ [ id:'test' ], file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) ] + lineage_dataset = [ file(params.test_data['homo_sapiens']['genome']['chr22_odb10_tar_gz'], checkIfExists: true) ] + UNTAR(lineage_dataset) + BUSCO_CHR22 ( input, + [], + UNTAR.out.untar ) +} + diff --git a/tests/modules/busco/nextflow.config b/tests/modules/busco/nextflow.config new file mode 100644 index 00000000..50f50a7a --- /dev/null +++ b/tests/modules/busco/nextflow.config @@ -0,0 +1,5 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + +} \ No newline at end of file diff --git a/tests/modules/busco/test.yml b/tests/modules/busco/test.yml new file mode 100644 index 00000000..32553d2e --- /dev/null +++ b/tests/modules/busco/test.yml @@ -0,0 +1,20 @@ +- name: busco test_busco_bacteroidales + command: nextflow run ./tests/modules/busco -entry test_busco_bacteroidales -c ./tests/config/nextflow.config -c ./tests/modules/busco/nextflow.config + tags: + - busco + files: + - path: output/busco/test/run_bacteroidales_odb10/full_table.tsv + md5sum: 8d7b401d875ecd9291b01bf4485bf080 + - path: output/busco/test/run_bacteroidales_odb10/short_summary.txt + contains: ['Complete BUSCOs (C)'] + +- name: busco test_busco_chr22 + command: nextflow run ./tests/modules/busco -entry test_busco_chr22 -c ./tests/config/nextflow.config -c ./tests/modules/busco/nextflow.config + tags: + - busco + files: + - path: output/busco/test/run_chr22_odb10/full_table.tsv + md5sum: 83f20e8996c591338ada73b6ab0eb269 + - path: output/busco/test/run_chr22_odb10/short_summary.txt + contains: ['Complete BUSCOs (C)'] + From 3e6be5060007d3feb24e3dfb95d602a3fbd3eb58 Mon Sep 17 00:00:00 2001 From: Priyanka Surana Date: Fri, 25 Feb 2022 13:26:23 +0000 Subject: [PATCH 02/29] Adjusted Augustus Config Path settings. Suggested by @mahesh-panchal --- modules/busco/main.nf | 26 +++++++++++++++++++------- modules/busco/meta.yml | 3 +++ tests/modules/busco/main.nf | 15 ++------------- tests/modules/busco/test.yml | 14 ++------------ 4 files changed, 26 insertions(+), 32 deletions(-) diff --git a/modules/busco/main.nf b/modules/busco/main.nf index 445f8ed4..6e526538 100644 --- a/modules/busco/main.nf +++ b/modules/busco/main.nf @@ -9,6 +9,7 @@ process BUSCO { input: tuple val(meta), path(fasta) + val(mode) path(augustus_config) val(lineage) @@ -24,15 +25,26 @@ process BUSCO { """ # Ensure the input is uncompressed gzip -cdf $fasta > __UNCOMPRESSED_FASTA_FILE__ + + # Nextflow changes the container --entrypoint to /bin/bash (container default entrypoint: /usr/local/env-execute) + # Check for container variable initialisation script and source it. + if [ -f "/usr/local/env-activate.sh" ]; then + # . "/usr/local/env-activate.sh" # Errors out because of various unbound variables + export PATH='/usr/local/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin' + export CONDA_PREFIX='/usr/local' + export CONDA_SHLVL='1' + export CONDA_DEFAULT_ENV='/usr/local' + export CONDA_PROMPT_MODIFIER='' + . "/usr/local/etc/conda/activate.d/activate-r-base.sh" + . "/usr/local/etc/conda/activate.d/augustus.sh" + . "/usr/local/etc/conda/activate.d/openjdk_activate.sh" + fi + # Copy the image's AUGUSTUS config directory if it was not provided to the module [ ! -e augustus_config ] && cp -a /usr/local/config augustus_config - AUGUSTUS_CONFIG_PATH=augustus_config \\ - busco \\ - $args \\ - --augustus \\ - --cpu $task.cpus \\ - --in __UNCOMPRESSED_FASTA_FILE__ \\ - --out $meta.id + + # Busco command + AUGUSTUS_CONFIG_PATH=augustus_config busco $args --augustus --mode $mode --cpu $task.cpus --in __UNCOMPRESSED_FASTA_FILE__ --out $meta.id cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/busco/meta.yml b/modules/busco/meta.yml index 0cd9e402..8f41f2f3 100644 --- a/modules/busco/meta.yml +++ b/modules/busco/meta.yml @@ -24,6 +24,9 @@ input: type: file description: Nucleic or amino acid sequence file in FASTA format pattern: "*.{fasta}" + - mode: + type: value + description: Sets the assessment MODE – genome, proteins, transcriptome - augustus_config: type: directory description: AUGUSTUS config directory diff --git a/tests/modules/busco/main.nf b/tests/modules/busco/main.nf index 28a4921c..d12abae9 100644 --- a/tests/modules/busco/main.nf +++ b/tests/modules/busco/main.nf @@ -3,24 +3,13 @@ nextflow.enable.dsl = 2 include { BUSCO as BUSCO_BACTE } from '../../../modules/busco/main.nf' -include { BUSCO as BUSCO_CHR22 } from '../../../modules/busco/main.nf' -include { UNTAR } from '../../../modules/untar/main.nf' // This tests genome decompression, empty input channels and data download -workflow test_busco_bacteroidales { +workflow test_busco { input = [ [ id:'test' ], file(params.test_data['bacteroides_fragilis']['genome']['genome_fna_gz'], checkIfExists: true) ] BUSCO_BACTE ( input, + "genome", [], [] ) } -// This tests uncompressed genome, BUSCO lineage file provided via input channel, and offline mode -workflow test_busco_chr22 { - input = [ [ id:'test' ], file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) ] - lineage_dataset = [ file(params.test_data['homo_sapiens']['genome']['chr22_odb10_tar_gz'], checkIfExists: true) ] - UNTAR(lineage_dataset) - BUSCO_CHR22 ( input, - [], - UNTAR.out.untar ) -} - diff --git a/tests/modules/busco/test.yml b/tests/modules/busco/test.yml index 32553d2e..d66bd305 100644 --- a/tests/modules/busco/test.yml +++ b/tests/modules/busco/test.yml @@ -1,5 +1,5 @@ -- name: busco test_busco_bacteroidales - command: nextflow run ./tests/modules/busco -entry test_busco_bacteroidales -c ./tests/config/nextflow.config -c ./tests/modules/busco/nextflow.config +- name: busco test_busco + command: nextflow run ./tests/modules/busco -entry test_busco -c ./tests/config/nextflow.config -c ./tests/modules/busco/nextflow.config tags: - busco files: @@ -8,13 +8,3 @@ - path: output/busco/test/run_bacteroidales_odb10/short_summary.txt contains: ['Complete BUSCOs (C)'] -- name: busco test_busco_chr22 - command: nextflow run ./tests/modules/busco -entry test_busco_chr22 -c ./tests/config/nextflow.config -c ./tests/modules/busco/nextflow.config - tags: - - busco - files: - - path: output/busco/test/run_chr22_odb10/full_table.tsv - md5sum: 83f20e8996c591338ada73b6ab0eb269 - - path: output/busco/test/run_chr22_odb10/short_summary.txt - contains: ['Complete BUSCOs (C)'] - From 9a72548ee122607e46681f4b0b87296b48580cb3 Mon Sep 17 00:00:00 2001 From: Mahesh Binzer-Panchal Date: Tue, 3 May 2022 08:42:56 +0000 Subject: [PATCH 03/29] Update busco module --- modules/busco/main.nf | 62 +++++++++++++++++++---------- tests/modules/busco/main.nf | 20 +++++++--- tests/modules/busco/nextflow.config | 5 ++- tests/modules/busco/test.yml | 9 ++--- 4 files changed, 60 insertions(+), 36 deletions(-) diff --git a/modules/busco/main.nf b/modules/busco/main.nf index 6e526538..e66beeeb 100644 --- a/modules/busco/main.nf +++ b/modules/busco/main.nf @@ -1,31 +1,31 @@ process BUSCO { tag "$meta.id" label 'process_medium' - - conda (params.enable_conda ? "bioconda::busco=5.2.2" : null) + + conda (params.enable_conda ? "bioconda::busco=5.3.2" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/busco:5.2.2--pyhdfd78af_0': - 'quay.io/biocontainers/busco:5.2.2--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/busco:5.3.2--pyhdfd78af_0': + 'quay.io/biocontainers/busco:5.3.2--pyhdfd78af_0' }" input: - tuple val(meta), path(fasta) - val(mode) - path(augustus_config) - val(lineage) + tuple val(meta), path(fasta) // Required: meta map, and fasta sequence file + each lineage // Required: lineage to check against + path busco_lineages_path // Recommended: path to busco lineages - downloads if not set + path config_file // Optional: busco configuration file output: - tuple val(meta), path("${meta.id}/run_*/full_table.tsv"), emit: tsv - tuple val(meta), path("${meta.id}/run_*/short_summary.txt"), emit: txt - path "versions.yml" , emit: versions + tuple val(meta), path("*-busco"), emit: busco_dir + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when script: def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - if (lineage) args += " --lineage_dataset $lineage" + def prefix = task.ext.prefix ?: "${meta.id}-${lineage}" + def busco_config = config_file ? "--config $config_file" : '' + def busco_lineage_dir = busco_lineages_path ? "--download_path ${busco_lineages_path}" : '' """ - # Ensure the input is uncompressed - gzip -cdf $fasta > __UNCOMPRESSED_FASTA_FILE__ - # Nextflow changes the container --entrypoint to /bin/bash (container default entrypoint: /usr/local/env-execute) # Check for container variable initialisation script and source it. if [ -f "/usr/local/env-activate.sh" ]; then @@ -39,12 +39,30 @@ process BUSCO { . "/usr/local/etc/conda/activate.d/augustus.sh" . "/usr/local/etc/conda/activate.d/openjdk_activate.sh" fi - - # Copy the image's AUGUSTUS config directory if it was not provided to the module - [ ! -e augustus_config ] && cp -a /usr/local/config augustus_config - - # Busco command - AUGUSTUS_CONFIG_PATH=augustus_config busco $args --augustus --mode $mode --cpu $task.cpus --in __UNCOMPRESSED_FASTA_FILE__ --out $meta.id + + # If the augustus config directory is not writable, then copy to writeable area + if [ ! -w "\${AUGUSTUS_CONFIG_PATH}" ]; then + # Create writable tmp directory for augustus + AUG_CONF_DIR=\$( mktemp -d -p \$PWD ) + cp -r \$AUGUSTUS_CONFIG_PATH/* \$AUG_CONF_DIR + export AUGUSTUS_CONFIG_PATH=\$AUG_CONF_DIR + echo "New AUGUSTUS_CONFIG_PATH=\${AUGUSTUS_CONFIG_PATH}" + fi + + # Ensure the input is uncompressed + gzip -cdf $fasta > ${prefix}_uncompressed.fasta + + busco \\ + --cpu $task.cpus \\ + --in ${prefix}_uncompressed.fasta \\ + --out ${prefix}-busco \\ + --lineage_dataset $lineage \\ + $busco_lineage_dir \\ + $busco_config \\ + $args + + # clean up + rm ${prefix}_uncompressed.fasta cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/tests/modules/busco/main.nf b/tests/modules/busco/main.nf index d12abae9..4985bd18 100644 --- a/tests/modules/busco/main.nf +++ b/tests/modules/busco/main.nf @@ -2,14 +2,22 @@ nextflow.enable.dsl = 2 -include { BUSCO as BUSCO_BACTE } from '../../../modules/busco/main.nf' +include { BUSCO } from '../../../modules/busco/main.nf' // This tests genome decompression, empty input channels and data download workflow test_busco { - input = [ [ id:'test' ], file(params.test_data['bacteroides_fragilis']['genome']['genome_fna_gz'], checkIfExists: true) ] - BUSCO_BACTE ( input, - "genome", - [], - [] ) + + input = [ + [ id:'test', single_end:false ], // meta map + file( params.test_data['bacteroides_fragilis']['genome']['genome_fna_gz'], checkIfExists: true) + ] + + BUSCO ( + input, + 'bacteria_odb10', + [], // Download busco lineage + [], // No config + ) + } diff --git a/tests/modules/busco/nextflow.config b/tests/modules/busco/nextflow.config index 50f50a7a..feea8d40 100644 --- a/tests/modules/busco/nextflow.config +++ b/tests/modules/busco/nextflow.config @@ -1,5 +1,6 @@ process { publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } - -} \ No newline at end of file + ext.args = '--mode genome' + +} diff --git a/tests/modules/busco/test.yml b/tests/modules/busco/test.yml index d66bd305..3e7c1bf6 100644 --- a/tests/modules/busco/test.yml +++ b/tests/modules/busco/test.yml @@ -1,10 +1,7 @@ - name: busco test_busco - command: nextflow run ./tests/modules/busco -entry test_busco -c ./tests/config/nextflow.config -c ./tests/modules/busco/nextflow.config + command: nextflow run tests/modules/busco -entry test_busco -c tests/config/nextflow.config tags: - busco files: - - path: output/busco/test/run_bacteroidales_odb10/full_table.tsv - md5sum: 8d7b401d875ecd9291b01bf4485bf080 - - path: output/busco/test/run_bacteroidales_odb10/short_summary.txt - contains: ['Complete BUSCOs (C)'] - + - path: output/busco/versions.yml + md5sum: 921e2abe85bf73e63a8b494453dc83cf From 9eff38ff94c2ecf7236b770b005518bacd272983 Mon Sep 17 00:00:00 2001 From: Mahesh Binzer-Panchal Date: Tue, 3 May 2022 09:15:15 +0000 Subject: [PATCH 04/29] Update meta.yml --- modules/busco/meta.yml | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/modules/busco/meta.yml b/modules/busco/meta.yml index 8f41f2f3..72ae4c29 100644 --- a/modules/busco/meta.yml +++ b/modules/busco/meta.yml @@ -22,14 +22,17 @@ input: e.g. [ id:'test', single_end:false ] - fasta: type: file - description: Nucleic or amino acid sequence file in FASTA format - pattern: "*.{fasta}" - - mode: + description: Nucleic or amino acid sequence file in FASTA format. + pattern: "*.{fasta,fna,fa,fasta.gz,fna.gz,fa.gz}" + - lineage: type: value - description: Sets the assessment MODE – genome, proteins, transcriptome - - augustus_config: + description: The BUSCO lineage to use. + - busco_lineages_path: type: directory - description: AUGUSTUS config directory + description: Path to local BUSCO lineages directory. + - config_file: + type: directory + description: Path to BUSCO config file. output: - meta: @@ -37,19 +40,16 @@ output: description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] + - busco_dir: + type: directory + description: BUSCO lineage specific output + pattern: "*-busco" - versions: type: file description: File containing software versions pattern: "versions.yml" - - tsv: - type: file - description: Full summary table - pattern: "*.{tsv}" - - txt: - type: file - description: Short summary text - pattern: "*.{txt}" authors: - "@priyanka-surana" - "@charles-plessy" + - "@mahesh-panchal" From 012f08537880805ad70a006f0076e6f8933b860f Mon Sep 17 00:00:00 2001 From: Mahesh Binzer-Panchal Date: Tue, 3 May 2022 09:19:39 +0000 Subject: [PATCH 05/29] Prettier --- modules/busco/meta.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/busco/meta.yml b/modules/busco/meta.yml index 72ae4c29..b73f498b 100644 --- a/modules/busco/meta.yml +++ b/modules/busco/meta.yml @@ -12,7 +12,7 @@ tools: documentation: https://busco.ezlab.org/busco_userguide.html tool_dev_url: https://gitlab.com/ezlab/busco doi: "10.1007/978-1-4939-9173-0_14" - licence: ['MIT'] + licence: ["MIT"] input: - meta: From 57db28386ebd236e4ffd07c47eba21f284cda55c Mon Sep 17 00:00:00 2001 From: Mahesh Binzer-Panchal Date: Tue, 3 May 2022 13:26:49 +0000 Subject: [PATCH 06/29] Support batch fasta analysis --- modules/busco/main.nf | 24 +++++++++++++++++------- tests/modules/busco/main.nf | 6 ++++-- 2 files changed, 21 insertions(+), 9 deletions(-) diff --git a/modules/busco/main.nf b/modules/busco/main.nf index e66beeeb..4ba4baac 100644 --- a/modules/busco/main.nf +++ b/modules/busco/main.nf @@ -8,10 +8,10 @@ process BUSCO { 'quay.io/biocontainers/busco:5.3.2--pyhdfd78af_0' }" input: - tuple val(meta), path(fasta) // Required: meta map, and fasta sequence file - each lineage // Required: lineage to check against - path busco_lineages_path // Recommended: path to busco lineages - downloads if not set - path config_file // Optional: busco configuration file + tuple val(meta), path(fasta, stageAs: 'tmp_input/*') // Required: meta map, and fasta sequence files + each lineage // Required: lineage to check against + path busco_lineages_path // Recommended: path to busco lineages - downloads if not set + path config_file // Optional: busco configuration file output: tuple val(meta), path("*-busco"), emit: busco_dir @@ -50,11 +50,21 @@ process BUSCO { fi # Ensure the input is uncompressed - gzip -cdf $fasta > ${prefix}_uncompressed.fasta + INPUT_SEQS=input_seqs + mkdir "\$INPUT_SEQS" + cd "\$INPUT_SEQS" + for FASTA in ../tmp_input/*; do + if [ "\${FASTA##*.}" == 'gz' ]; then + gzip -cdf "\$FASTA" > \$( basename "\$FASTA" .gz ) + else + ln -s "\$FASTA" . + fi + done + cd .. busco \\ --cpu $task.cpus \\ - --in ${prefix}_uncompressed.fasta \\ + --in "\$INPUT_SEQS" \\ --out ${prefix}-busco \\ --lineage_dataset $lineage \\ $busco_lineage_dir \\ @@ -62,7 +72,7 @@ process BUSCO { $args # clean up - rm ${prefix}_uncompressed.fasta + rm -rf "\$INPUT_SEQS" cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/tests/modules/busco/main.nf b/tests/modules/busco/main.nf index 4985bd18..bdd5f40d 100644 --- a/tests/modules/busco/main.nf +++ b/tests/modules/busco/main.nf @@ -9,7 +9,10 @@ workflow test_busco { input = [ [ id:'test', single_end:false ], // meta map - file( params.test_data['bacteroides_fragilis']['genome']['genome_fna_gz'], checkIfExists: true) + [ + file( params.test_data['bacteroides_fragilis']['genome']['genome_fna_gz'], checkIfExists: true), + file( params.test_data['candidatus_portiera_aleyrodidarum']['genome']['genome_fasta'], checkIfExists: true) + ] ] BUSCO ( @@ -20,4 +23,3 @@ workflow test_busco { ) } - From 4be754b0080364ea14470ea152e3d0f3ca38baa8 Mon Sep 17 00:00:00 2001 From: Matthieu Muffato Date: Tue, 3 May 2022 14:53:17 +0100 Subject: [PATCH 07/29] It's a file, not a directory --- modules/busco/meta.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/busco/meta.yml b/modules/busco/meta.yml index b73f498b..e546c5ae 100644 --- a/modules/busco/meta.yml +++ b/modules/busco/meta.yml @@ -31,7 +31,7 @@ input: type: directory description: Path to local BUSCO lineages directory. - config_file: - type: directory + type: file description: Path to BUSCO config file. output: From 6351ec745ea35861554a0896470a43d47d1abee4 Mon Sep 17 00:00:00 2001 From: Matthieu Muffato Date: Tue, 3 May 2022 14:56:27 +0100 Subject: [PATCH 08/29] Update contributor list --- modules/busco/meta.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/modules/busco/meta.yml b/modules/busco/meta.yml index e546c5ae..1301e091 100644 --- a/modules/busco/meta.yml +++ b/modules/busco/meta.yml @@ -53,3 +53,4 @@ authors: - "@priyanka-surana" - "@charles-plessy" - "@mahesh-panchal" + - "@muffato" From 1c54c0b1ae684dba97345a136b802b5ef8e31e4d Mon Sep 17 00:00:00 2001 From: Mahesh Binzer-Panchal Date: Tue, 3 May 2022 16:02:11 +0200 Subject: [PATCH 09/29] Apply suggestions from code review Co-authored-by: Matthieu Muffato --- modules/busco/main.nf | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/modules/busco/main.nf b/modules/busco/main.nf index 4ba4baac..c011b05b 100644 --- a/modules/busco/main.nf +++ b/modules/busco/main.nf @@ -24,20 +24,14 @@ process BUSCO { def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}-${lineage}" def busco_config = config_file ? "--config $config_file" : '' - def busco_lineage_dir = busco_lineages_path ? "--download_path ${busco_lineages_path}" : '' + def busco_lineage_dir = busco_lineages_path ? "--offline --download_path ${busco_lineages_path}" : '' """ # Nextflow changes the container --entrypoint to /bin/bash (container default entrypoint: /usr/local/env-execute) # Check for container variable initialisation script and source it. if [ -f "/usr/local/env-activate.sh" ]; then - # . "/usr/local/env-activate.sh" # Errors out because of various unbound variables - export PATH='/usr/local/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin' - export CONDA_PREFIX='/usr/local' - export CONDA_SHLVL='1' - export CONDA_DEFAULT_ENV='/usr/local' - export CONDA_PROMPT_MODIFIER='' - . "/usr/local/etc/conda/activate.d/activate-r-base.sh" - . "/usr/local/etc/conda/activate.d/augustus.sh" - . "/usr/local/etc/conda/activate.d/openjdk_activate.sh" + set +u # Otherwise, errors out because of various unbound variables + . "/usr/local/env-activate.sh" + set -u fi # If the augustus config directory is not writable, then copy to writeable area From f02cd618b89e57f7d7a92bcdd8c654c85897c14c Mon Sep 17 00:00:00 2001 From: "Jill V. Hagey, PhD" Date: Tue, 3 May 2022 10:09:19 -0400 Subject: [PATCH 10/29] Update meta.yml --- modules/busco/meta.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/modules/busco/meta.yml b/modules/busco/meta.yml index 1301e091..007d5e7c 100644 --- a/modules/busco/meta.yml +++ b/modules/busco/meta.yml @@ -54,3 +54,4 @@ authors: - "@charles-plessy" - "@mahesh-panchal" - "@muffato" + - "@jvhagey" From 8df20218bf2f348a4f30b7a9b2f72bdb8e9e23a5 Mon Sep 17 00:00:00 2001 From: Mahesh Binzer-Panchal Date: Wed, 4 May 2022 09:22:43 +0000 Subject: [PATCH 11/29] Add more tests and capture summaries Co-authored-by: Jill V. Hagey, PhD --- modules/busco/main.nf | 11 +- tests/modules/busco/main.nf | 304 +++++++++++++++++++++++++++- tests/modules/busco/nextflow.config | 24 ++- tests/modules/busco/test.yml | 60 +++++- 4 files changed, 392 insertions(+), 7 deletions(-) diff --git a/modules/busco/main.nf b/modules/busco/main.nf index c011b05b..59ac20f8 100644 --- a/modules/busco/main.nf +++ b/modules/busco/main.nf @@ -14,8 +14,11 @@ process BUSCO { path config_file // Optional: busco configuration file output: - tuple val(meta), path("*-busco"), emit: busco_dir - path "versions.yml" , emit: versions + tuple val(meta), path("*-busco.batch_summary.txt"), emit: batch_summary + tuple val(meta), path("short_summary.*.txt") , emit: short_summaries_txt + tuple val(meta), path("short_summary.*.json") , emit: short_summaries_json + tuple val(meta), path("*-busco") , emit: busco_dir + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -68,6 +71,10 @@ process BUSCO { # clean up rm -rf "\$INPUT_SEQS" + # Move files to avoid staging/publishing issues + mv ${prefix}-busco/batch_summary.txt ${prefix}-busco.batch_summary.txt + mv ${prefix}-busco/*/short_summary.*.{json,txt} . + cat <<-END_VERSIONS > versions.yml "${task.process}": busco: \$( busco --version 2>&1 | sed 's/^BUSCO //' ) diff --git a/tests/modules/busco/main.nf b/tests/modules/busco/main.nf index bdd5f40d..a678d371 100644 --- a/tests/modules/busco/main.nf +++ b/tests/modules/busco/main.nf @@ -5,7 +5,70 @@ nextflow.enable.dsl = 2 include { BUSCO } from '../../../modules/busco/main.nf' // This tests genome decompression, empty input channels and data download -workflow test_busco { +workflow test_busco_genome_single_fasta { + + input = [ + [ id:'test', single_end:false ], // meta map + file( params.test_data['bacteroides_fragilis']['genome']['genome_fna_gz'], checkIfExists: true) + ] + + BUSCO ( + input, + ['bacteria_odb10', 'bacteroidetes_odb10'], + [], // Download busco lineage + [], // No config + ) + + /* Output tree: + /tmp/tmpimsfk4sj/busco/ + ├── test-bacteria_odb10-busco -> /tmp/tmp1sz7013h/b7/fdeaab567e1c5bccc475a4c19b8582/test-bacteria_odb10-busco/ + │ ├── batch_summary.txt + │ ├── genome.fna/ + │ │ ├── logs/ + │ │ │ ├── hmmsearch_err.log + │ │ │ ├── hmmsearch_out.log + │ │ │ ├── prodigal_err.log + │ │ │ └── prodigal_out.log + │ │ ├── prodigal_output/ + │ │ │ └── predicted_genes/ + │ │ ├── run_bacteria_odb10/ + │ │ │ ├── busco_sequences/ + │ │ │ ├── full_table.tsv + │ │ │ ├── hmmer_output/ + │ │ │ ├── missing_busco_list.tsv + │ │ │ ├── short_summary.json + │ │ │ └── short_summary.txt + │ │ ├── short_summary.specific.bacteria_odb10.genome.fna.json + │ │ └── short_summary.specific.bacteria_odb10.genome.fna.txt + │ └── logs/ + │ └── busco.log + ├── test-bacteroidetes_odb10-busco -> /tmp/tmp1sz7013h/75/0da56f59ee44bd2b85e0172906de49/test-bacteroidetes_odb10-busco/ + │ ├── batch_summary.txt + │ ├── genome.fna/ + │ │ ├── logs/ + │ │ │ ├── hmmsearch_err.log + │ │ │ ├── hmmsearch_out.log + │ │ │ ├── prodigal_err.log + │ │ │ └── prodigal_out.log + │ │ ├── prodigal_output/ + │ │ │ └── predicted_genes/ + │ │ ├── run_bacteroidetes_odb10/ + │ │ │ ├── busco_sequences/ + │ │ │ ├── full_table.tsv + │ │ │ ├── hmmer_output/ + │ │ │ ├── missing_busco_list.tsv + │ │ │ ├── short_summary.json + │ │ │ └── short_summary.txt + │ │ ├── short_summary.specific.bacteroidetes_odb10.genome.fna.json + │ │ └── short_summary.specific.bacteroidetes_odb10.genome.fna.txt + │ └── logs/ + │ └── busco.log + └── versions.yml -> /tmp/tmp1sz7013h/b7/fdeaab567e1c5bccc475a4c19b8582/versions.yml + */ + +} + +workflow test_busco_genome_multi_fasta { input = [ [ id:'test', single_end:false ], // meta map @@ -22,4 +85,243 @@ workflow test_busco { [], // No config ) + /* Output tree: + /tmp/tmpt22rjxzq/busco/ + ├── test-bacteria_odb10-busco -> /tmp/tmpfxt64xr_/36/425acbe5e9b27ba0bac8861f735494/test-bacteria_odb10-busco/ + │ ├── batch_summary.txt + │ ├── genome.fasta/ + │ │ ├── logs/ + │ │ │ ├── hmmsearch_err.log + │ │ │ ├── hmmsearch_out.log + │ │ │ ├── prodigal_err.log + │ │ │ └── prodigal_out.log + │ │ ├── prodigal_output/ + │ │ │ └── predicted_genes/ + │ │ ├── run_bacteria_odb10/ + │ │ │ ├── busco_sequences/ + │ │ │ ├── full_table.tsv + │ │ │ ├── hmmer_output/ + │ │ │ ├── missing_busco_list.tsv + │ │ │ ├── short_summary.json + │ │ │ └── short_summary.txt + │ │ ├── short_summary.specific.bacteria_odb10.genome.fasta.json + │ │ └── short_summary.specific.bacteria_odb10.genome.fasta.txt + │ ├── genome.fna/ + │ │ ├── logs/ + │ │ │ ├── hmmsearch_err.log + │ │ │ ├── hmmsearch_out.log + │ │ │ ├── prodigal_err.log + │ │ │ └── prodigal_out.log + │ │ ├── prodigal_output/ + │ │ │ └── predicted_genes/ + │ │ ├── run_bacteria_odb10/ + │ │ │ ├── busco_sequences/ + │ │ │ ├── full_table.tsv + │ │ │ ├── hmmer_output/ + │ │ │ ├── missing_busco_list.tsv + │ │ │ ├── short_summary.json + │ │ │ └── short_summary.txt + │ │ ├── short_summary.specific.bacteria_odb10.genome.fna.json + │ │ └── short_summary.specific.bacteria_odb10.genome.fna.txt + │ └── logs/ + │ └── busco.log + └── versions.yml -> /tmp/tmpfxt64xr_/36/425acbe5e9b27ba0bac8861f735494/versions.yml + */ + +} + +workflow test_busco_eukaryote_metaeuk { + + input = [ + [ id:'test', single_end:false ], // meta map + file( params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + ] + + BUSCO ( + input, + 'eukaryota_odb10', + [], // Download busco lineage + [], // No config + ) + + /* Output tree: + /tmp/tmp22sf7kg9/busco/ + ├── test-eukaryota_odb10-busco -> /tmp/tmpmic8qsk6/d5/d8cb6681c0fcaa6da34b57ec174d59/test-eukaryota_odb10-busco/ + │ ├── batch_summary.txt + │ ├── genome.fasta/ + │ │ ├── logs/ + │ │ │ ├── hmmsearch_err.log + │ │ │ ├── hmmsearch_out.log + │ │ │ ├── metaeuk_err.log + │ │ │ └── metaeuk_out.log + │ │ ├── run_eukaryota_odb10/ + │ │ │ ├── busco_sequences/ + │ │ │ ├── full_table.tsv + │ │ │ ├── hmmer_output/ + │ │ │ ├── metaeuk_output/ + │ │ │ ├── missing_busco_list.tsv + │ │ │ ├── short_summary.json + │ │ │ └── short_summary.txt + │ │ ├── short_summary.specific.eukaryota_odb10.genome.fasta.json + │ │ └── short_summary.specific.eukaryota_odb10.genome.fasta.txt + │ └── logs/ + │ └── busco.log + └── versions.yml -> /tmp/tmpmic8qsk6/d5/d8cb6681c0fcaa6da34b57ec174d59/versions.yml + */ + +} + +workflow test_busco_eukaryote_augustus { + + input = [ + [ id:'test', single_end:false ], // meta map + file( params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + ] + + BUSCO ( + input, + 'eukaryota_odb10', + [], // Download busco lineage + [], // No config + ) + + /* Output tree: + /tmp/tmpo77wyvb9/busco/ + ├── test-eukaryota_odb10-busco -> /tmp/tmpshljnwcg/25/9891a19cbabda15a5c10fb5e34987f/test-eukaryota_odb10-busco/ + │ ├── batch_summary.txt + │ ├── genome.fasta/ + │ │ ├── blast_db/ + │ │ │ ├── genome.fasta.ndb + │ │ │ ├── genome.fasta.nhr + │ │ │ ├── genome.fasta.nin + │ │ │ ├── genome.fasta.not + │ │ │ ├── genome.fasta.nsq + │ │ │ ├── genome.fasta.ntf + │ │ │ └── genome.fasta.nto + │ │ ├── logs/ + │ │ │ ├── makeblastdb_err.log + │ │ │ ├── makeblastdb_out.log + │ │ │ ├── tblastn_err.log + │ │ │ └── tblastn_out.log + │ │ └── run_eukaryota_odb10/ + │ │ ├── augustus_output/ + │ │ ├── blast_output/ + │ │ ├── busco_sequences/ + │ │ └── hmmer_output/ + │ └── logs/ + │ └── busco.log + └── versions.yml -> /tmp/tmpshljnwcg/25/9891a19cbabda15a5c10fb5e34987f/versions.yml + */ + +} + +workflow test_busco_protein { + + input = [ + [ id:'test', single_end:false ], // meta map + file( params.test_data['candidatus_portiera_aleyrodidarum']['genome']['proteome_fasta'], checkIfExists: true) + ] + + BUSCO ( + input, + 'bacteria_odb10', + [], // Download busco lineage + [], // No config + ) + + /* Output tree: + /tmp/tmplju98s42/busco/ + ├── test-bacteria_odb10-busco -> /tmp/tmp0oru9_61/9c/e992f5eee84806770002e4510f51cb/test-bacteria_odb10-busco/ + │ ├── batch_summary.txt + │ ├── logs/ + │ │ └── busco.log + │ └── proteome.fasta/ + │ ├── logs/ + │ │ ├── hmmsearch_err.log + │ │ └── hmmsearch_out.log + │ ├── run_bacteria_odb10/ + │ │ ├── busco_sequences/ + │ │ ├── full_table.tsv + │ │ ├── hmmer_output/ + │ │ ├── missing_busco_list.tsv + │ │ ├── short_summary.json + │ │ └── short_summary.txt + │ ├── short_summary.specific.bacteria_odb10.proteome.fasta.json + │ └── short_summary.specific.bacteria_odb10.proteome.fasta.txt + └── versions.yml -> /tmp/tmp0oru9_61/9c/e992f5eee84806770002e4510f51cb/versions.yml + */ +} +workflow test_busco_transcriptome { + + input = [ + [ id:'test', single_end:false ], // meta map + file( params.test_data['bacteroides_fragilis']['illumina']['test1_contigs_fa_gz'], checkIfExists: true) + ] + + BUSCO ( + input, + 'bacteria_odb10', + [], // Download busco lineage + [], // No config + ) + + /* Output tree: + /tmp/tmp5twpr8o9/busco/ + ├── test-bacteria_odb10-busco -> /tmp/tmp_qyjiads/0d/886515d0f06686b2227517398ef8ce/test-bacteria_odb10-busco/ + │ ├── batch_summary.txt + │ ├── logs/ + │ │ └── busco.log + │ └── test1.contigs.fa/ + │ ├── blast_db/ + │ │ ├── test1.contigs.fa.ndb + │ │ ├── test1.contigs.fa.nhr + │ │ ├── test1.contigs.fa.nin + │ │ ├── test1.contigs.fa.not + │ │ ├── test1.contigs.fa.nsq + │ │ ├── test1.contigs.fa.ntf + │ │ └── test1.contigs.fa.nto + │ ├── logs/ + │ │ ├── hmmsearch_err.log + │ │ ├── hmmsearch_out.log + │ │ ├── makeblastdb_err.log + │ │ ├── makeblastdb_out.log + │ │ ├── tblastn_err.log + │ │ └── tblastn_out.log + │ ├── run_bacteria_odb10/ + │ │ ├── blast_output/ + │ │ ├── busco_sequences/ + │ │ ├── full_table.tsv + │ │ ├── hmmer_output/ + │ │ ├── missing_busco_list.tsv + │ │ ├── short_summary.json + │ │ ├── short_summary.txt + │ │ └── single_copy_proteins.faa + │ ├── short_summary.specific.bacteria_odb10.test1.contigs.fa.json + │ ├── short_summary.specific.bacteria_odb10.test1.contigs.fa.txt + │ └── translated_proteins/ + │ ├── 1024388at2.faa + │ ├── 1054741at2.faa + │ ├── 1093223at2.faa + │ ├── 1151822at2.faa + │ ├── 143460at2.faa + │ ├── 1491686at2.faa + │ ├── 1504821at2.faa + │ ├── 1574817at2.faa + │ ├── 1592033at2.faa + │ ├── 1623045at2.faa + │ ├── 1661836at2.faa + │ ├── 1674344at2.faa + │ ├── 1698718at2.faa + │ ├── 1990650at2.faa + │ ├── 223233at2.faa + │ ├── 402899at2.faa + │ ├── 505485at2.faa + │ ├── 665824at2.faa + │ ├── 776861at2.faa + │ ├── 874197at2.faa + │ ├── 932854at2.faa + │ └── 95696at2.faa + └── versions.yml -> /tmp/tmp_qyjiads/0d/886515d0f06686b2227517398ef8ce/versions.yml + */ + } diff --git a/tests/modules/busco/nextflow.config b/tests/modules/busco/nextflow.config index feea8d40..9e8f718f 100644 --- a/tests/modules/busco/nextflow.config +++ b/tests/modules/busco/nextflow.config @@ -1,6 +1,28 @@ process { publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } - ext.args = '--mode genome' + withName: 'test_busco_genome_single_fasta:BUSCO' { + ext.args = '--mode genome' + } + + withName: 'test_busco_genome_multi_fasta:BUSCO' { + ext.args = '--mode genome' + } + + withName: 'test_busco_eukaryote_metaeuk:BUSCO' { + ext.args = '--mode genome' + } + + withName: 'test_busco_eukaryote_augustus:BUSCO' { + ext.args = '--mode genome --augustus' + } + + withName: 'test_busco_protein:BUSCO' { + ext.args = '--mode proteins' + } + + withName: 'test_busco_transcriptome:BUSCO'{ + ext.args = '--mode transcriptome' + } } diff --git a/tests/modules/busco/test.yml b/tests/modules/busco/test.yml index 3e7c1bf6..2125c3a4 100644 --- a/tests/modules/busco/test.yml +++ b/tests/modules/busco/test.yml @@ -1,7 +1,61 @@ -- name: busco test_busco - command: nextflow run tests/modules/busco -entry test_busco -c tests/config/nextflow.config +- name: busco test_busco_genome_single_fasta + command: nextflow run tests/modules/busco -entry test_busco_genome_single_fasta -c tests/config/nextflow.config tags: - busco + - bacteria + - genome files: - path: output/busco/versions.yml - md5sum: 921e2abe85bf73e63a8b494453dc83cf + md5sum: 8aa830f71587d859df35c6cfab59f35d + +- name: busco test_busco_genome_multi_fasta + command: nextflow run tests/modules/busco -entry test_busco_genome_multi_fasta -c tests/config/nextflow.config + tags: + - busco + - bacteria + - genome + files: + - path: output/busco/versions.yml + md5sum: 9a959eb0a1f765777dff1ea2f5c139c0 + +- name: busco test_busco_eukaryote_metaeuk + command: nextflow run tests/modules/busco -entry test_busco_eukaryote_metaeuk -c tests/config/nextflow.config + tags: + - busco + - eukaryote + - genome + - metaeuk + files: + - path: output/busco/versions.yml + md5sum: 34a808c257e6db1b0456f3b4372bc477 + +- name: busco test_busco_eukaryote_augustus + command: nextflow run tests/modules/busco -entry test_busco_eukaryote_augustus -c tests/config/nextflow.config + tags: + - busco + - eukaryote + - genome + - augustus + files: + - path: output/busco/versions.yml + md5sum: 2caac915461410b16a1524ac064cd0df + +- name: busco test_busco_protein + command: nextflow run tests/modules/busco -entry test_busco_protein -c tests/config/nextflow.config + tags: + - busco + - bacteria + - proteins + files: + - path: output/busco/versions.yml + md5sum: d7392261a57960a7e6aea609dce824f5 + +- name: busco test_busco_transcriptome + command: nextflow run tests/modules/busco -entry test_busco_transcriptome -c tests/config/nextflow.config + tags: + - busco + - bacteria + - transcriptome + files: + - path: output/busco/versions.yml + md5sum: 30eacbc7df70f6b1e72e0a7b6d02a7e1 From 73a8df9522e31eaa34f9f611d8a5a42d6a8c53fe Mon Sep 17 00:00:00 2001 From: Mahesh Binzer-Panchal Date: Wed, 4 May 2022 11:52:42 +0000 Subject: [PATCH 12/29] Fix no genes found test and update test.yml --- modules/busco/main.nf | 6 +- tests/modules/busco/main.nf | 166 +++++++++++++++++------------------ tests/modules/busco/test.yml | 126 +++++++++++++++++++++++--- 3 files changed, 198 insertions(+), 100 deletions(-) diff --git a/modules/busco/main.nf b/modules/busco/main.nf index 59ac20f8..f6bd63e2 100644 --- a/modules/busco/main.nf +++ b/modules/busco/main.nf @@ -15,8 +15,8 @@ process BUSCO { output: tuple val(meta), path("*-busco.batch_summary.txt"), emit: batch_summary - tuple val(meta), path("short_summary.*.txt") , emit: short_summaries_txt - tuple val(meta), path("short_summary.*.json") , emit: short_summaries_json + tuple val(meta), path("short_summary.*.txt") , emit: short_summaries_txt, optional: true + tuple val(meta), path("short_summary.*.json") , emit: short_summaries_json, optional: true tuple val(meta), path("*-busco") , emit: busco_dir path "versions.yml" , emit: versions @@ -73,7 +73,7 @@ process BUSCO { # Move files to avoid staging/publishing issues mv ${prefix}-busco/batch_summary.txt ${prefix}-busco.batch_summary.txt - mv ${prefix}-busco/*/short_summary.*.{json,txt} . + mv ${prefix}-busco/*/short_summary.*.{json,txt} . || echo "Short summaries were not available: No genes were found." cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/tests/modules/busco/main.nf b/tests/modules/busco/main.nf index a678d371..f0201d21 100644 --- a/tests/modules/busco/main.nf +++ b/tests/modules/busco/main.nf @@ -20,9 +20,12 @@ workflow test_busco_genome_single_fasta { ) /* Output tree: - /tmp/tmpimsfk4sj/busco/ - ├── test-bacteria_odb10-busco -> /tmp/tmp1sz7013h/b7/fdeaab567e1c5bccc475a4c19b8582/test-bacteria_odb10-busco/ - │ ├── batch_summary.txt + /tmp/tmpisa3ktco/busco/ + ├── short_summary.specific.bacteria_odb10.genome.fna.json -> /tmp/tmpm91x0mn2/8a/ff5c15baba0942cca15a8d53e98009/short_summary.specific.bacteria_odb10.genome.fna.json + ├── short_summary.specific.bacteria_odb10.genome.fna.txt -> /tmp/tmpm91x0mn2/8a/ff5c15baba0942cca15a8d53e98009/short_summary.specific.bacteria_odb10.genome.fna.txt + ├── short_summary.specific.bacteroidetes_odb10.genome.fna.json -> /tmp/tmpm91x0mn2/91/3abf602561d35fcd917711402977a3/short_summary.specific.bacteroidetes_odb10.genome.fna.json + ├── short_summary.specific.bacteroidetes_odb10.genome.fna.txt -> /tmp/tmpm91x0mn2/91/3abf602561d35fcd917711402977a3/short_summary.specific.bacteroidetes_odb10.genome.fna.txt + ├── test-bacteria_odb10-busco -> /tmp/tmpm91x0mn2/8a/ff5c15baba0942cca15a8d53e98009/test-bacteria_odb10-busco/ │ ├── genome.fna/ │ │ ├── logs/ │ │ │ ├── hmmsearch_err.log @@ -31,19 +34,17 @@ workflow test_busco_genome_single_fasta { │ │ │ └── prodigal_out.log │ │ ├── prodigal_output/ │ │ │ └── predicted_genes/ - │ │ ├── run_bacteria_odb10/ - │ │ │ ├── busco_sequences/ - │ │ │ ├── full_table.tsv - │ │ │ ├── hmmer_output/ - │ │ │ ├── missing_busco_list.tsv - │ │ │ ├── short_summary.json - │ │ │ └── short_summary.txt - │ │ ├── short_summary.specific.bacteria_odb10.genome.fna.json - │ │ └── short_summary.specific.bacteria_odb10.genome.fna.txt + │ │ └── run_bacteria_odb10/ + │ │ ├── busco_sequences/ + │ │ ├── full_table.tsv + │ │ ├── hmmer_output/ + │ │ ├── missing_busco_list.tsv + │ │ ├── short_summary.json + │ │ └── short_summary.txt │ └── logs/ │ └── busco.log - ├── test-bacteroidetes_odb10-busco -> /tmp/tmp1sz7013h/75/0da56f59ee44bd2b85e0172906de49/test-bacteroidetes_odb10-busco/ - │ ├── batch_summary.txt + ├── test-bacteria_odb10-busco.batch_summary.txt -> /tmp/tmpm91x0mn2/8a/ff5c15baba0942cca15a8d53e98009/test-bacteria_odb10-busco.batch_summary.txt + ├── test-bacteroidetes_odb10-busco -> /tmp/tmpm91x0mn2/91/3abf602561d35fcd917711402977a3/test-bacteroidetes_odb10-busco/ │ ├── genome.fna/ │ │ ├── logs/ │ │ │ ├── hmmsearch_err.log @@ -52,18 +53,17 @@ workflow test_busco_genome_single_fasta { │ │ │ └── prodigal_out.log │ │ ├── prodigal_output/ │ │ │ └── predicted_genes/ - │ │ ├── run_bacteroidetes_odb10/ - │ │ │ ├── busco_sequences/ - │ │ │ ├── full_table.tsv - │ │ │ ├── hmmer_output/ - │ │ │ ├── missing_busco_list.tsv - │ │ │ ├── short_summary.json - │ │ │ └── short_summary.txt - │ │ ├── short_summary.specific.bacteroidetes_odb10.genome.fna.json - │ │ └── short_summary.specific.bacteroidetes_odb10.genome.fna.txt + │ │ └── run_bacteroidetes_odb10/ + │ │ ├── busco_sequences/ + │ │ ├── full_table.tsv + │ │ ├── hmmer_output/ + │ │ ├── missing_busco_list.tsv + │ │ ├── short_summary.json + │ │ └── short_summary.txt │ └── logs/ │ └── busco.log - └── versions.yml -> /tmp/tmp1sz7013h/b7/fdeaab567e1c5bccc475a4c19b8582/versions.yml + ├── test-bacteroidetes_odb10-busco.batch_summary.txt -> /tmp/tmpm91x0mn2/91/3abf602561d35fcd917711402977a3/test-bacteroidetes_odb10-busco.batch_summary.txt + └── versions.yml -> /tmp/tmpm91x0mn2/91/3abf602561d35fcd917711402977a3/versions.yml */ } @@ -86,9 +86,12 @@ workflow test_busco_genome_multi_fasta { ) /* Output tree: - /tmp/tmpt22rjxzq/busco/ - ├── test-bacteria_odb10-busco -> /tmp/tmpfxt64xr_/36/425acbe5e9b27ba0bac8861f735494/test-bacteria_odb10-busco/ - │ ├── batch_summary.txt + /tmp/tmpk19byek7/busco/ + ├── short_summary.specific.bacteria_odb10.genome.fasta.json -> /tmp/tmplt9fv3tl/15/ff310a16d9ce7ad24e207a05ce718e/short_summary.specific.bacteria_odb10.genome.fasta.json + ├── short_summary.specific.bacteria_odb10.genome.fasta.txt -> /tmp/tmplt9fv3tl/15/ff310a16d9ce7ad24e207a05ce718e/short_summary.specific.bacteria_odb10.genome.fasta.txt + ├── short_summary.specific.bacteria_odb10.genome.fna.json -> /tmp/tmplt9fv3tl/15/ff310a16d9ce7ad24e207a05ce718e/short_summary.specific.bacteria_odb10.genome.fna.json + ├── short_summary.specific.bacteria_odb10.genome.fna.txt -> /tmp/tmplt9fv3tl/15/ff310a16d9ce7ad24e207a05ce718e/short_summary.specific.bacteria_odb10.genome.fna.txt + ├── test-bacteria_odb10-busco -> /tmp/tmplt9fv3tl/15/ff310a16d9ce7ad24e207a05ce718e/test-bacteria_odb10-busco/ │ ├── genome.fasta/ │ │ ├── logs/ │ │ │ ├── hmmsearch_err.log @@ -97,15 +100,13 @@ workflow test_busco_genome_multi_fasta { │ │ │ └── prodigal_out.log │ │ ├── prodigal_output/ │ │ │ └── predicted_genes/ - │ │ ├── run_bacteria_odb10/ - │ │ │ ├── busco_sequences/ - │ │ │ ├── full_table.tsv - │ │ │ ├── hmmer_output/ - │ │ │ ├── missing_busco_list.tsv - │ │ │ ├── short_summary.json - │ │ │ └── short_summary.txt - │ │ ├── short_summary.specific.bacteria_odb10.genome.fasta.json - │ │ └── short_summary.specific.bacteria_odb10.genome.fasta.txt + │ │ └── run_bacteria_odb10/ + │ │ ├── busco_sequences/ + │ │ ├── full_table.tsv + │ │ ├── hmmer_output/ + │ │ ├── missing_busco_list.tsv + │ │ ├── short_summary.json + │ │ └── short_summary.txt │ ├── genome.fna/ │ │ ├── logs/ │ │ │ ├── hmmsearch_err.log @@ -114,18 +115,17 @@ workflow test_busco_genome_multi_fasta { │ │ │ └── prodigal_out.log │ │ ├── prodigal_output/ │ │ │ └── predicted_genes/ - │ │ ├── run_bacteria_odb10/ - │ │ │ ├── busco_sequences/ - │ │ │ ├── full_table.tsv - │ │ │ ├── hmmer_output/ - │ │ │ ├── missing_busco_list.tsv - │ │ │ ├── short_summary.json - │ │ │ └── short_summary.txt - │ │ ├── short_summary.specific.bacteria_odb10.genome.fna.json - │ │ └── short_summary.specific.bacteria_odb10.genome.fna.txt + │ │ └── run_bacteria_odb10/ + │ │ ├── busco_sequences/ + │ │ ├── full_table.tsv + │ │ ├── hmmer_output/ + │ │ ├── missing_busco_list.tsv + │ │ ├── short_summary.json + │ │ └── short_summary.txt │ └── logs/ │ └── busco.log - └── versions.yml -> /tmp/tmpfxt64xr_/36/425acbe5e9b27ba0bac8861f735494/versions.yml + ├── test-bacteria_odb10-busco.batch_summary.txt -> /tmp/tmplt9fv3tl/15/ff310a16d9ce7ad24e207a05ce718e/test-bacteria_odb10-busco.batch_summary.txt + └── versions.yml -> /tmp/tmplt9fv3tl/15/ff310a16d9ce7ad24e207a05ce718e/versions.yml */ } @@ -145,28 +145,28 @@ workflow test_busco_eukaryote_metaeuk { ) /* Output tree: - /tmp/tmp22sf7kg9/busco/ - ├── test-eukaryota_odb10-busco -> /tmp/tmpmic8qsk6/d5/d8cb6681c0fcaa6da34b57ec174d59/test-eukaryota_odb10-busco/ - │ ├── batch_summary.txt + /tmp/tmpeq4dsir5/busco/ + ├── short_summary.specific.eukaryota_odb10.genome.fasta.json -> /tmp/tmp60hby2pk/6f/529873d91cda6bae3a4a6a21746aee/short_summary.specific.eukaryota_odb10.genome.fasta.json + ├── short_summary.specific.eukaryota_odb10.genome.fasta.txt -> /tmp/tmp60hby2pk/6f/529873d91cda6bae3a4a6a21746aee/short_summary.specific.eukaryota_odb10.genome.fasta.txt + ├── test-eukaryota_odb10-busco -> /tmp/tmp60hby2pk/6f/529873d91cda6bae3a4a6a21746aee/test-eukaryota_odb10-busco/ │ ├── genome.fasta/ │ │ ├── logs/ │ │ │ ├── hmmsearch_err.log │ │ │ ├── hmmsearch_out.log │ │ │ ├── metaeuk_err.log │ │ │ └── metaeuk_out.log - │ │ ├── run_eukaryota_odb10/ - │ │ │ ├── busco_sequences/ - │ │ │ ├── full_table.tsv - │ │ │ ├── hmmer_output/ - │ │ │ ├── metaeuk_output/ - │ │ │ ├── missing_busco_list.tsv - │ │ │ ├── short_summary.json - │ │ │ └── short_summary.txt - │ │ ├── short_summary.specific.eukaryota_odb10.genome.fasta.json - │ │ └── short_summary.specific.eukaryota_odb10.genome.fasta.txt + │ │ └── run_eukaryota_odb10/ + │ │ ├── busco_sequences/ + │ │ ├── full_table.tsv + │ │ ├── hmmer_output/ + │ │ ├── metaeuk_output/ + │ │ ├── missing_busco_list.tsv + │ │ ├── short_summary.json + │ │ └── short_summary.txt │ └── logs/ │ └── busco.log - └── versions.yml -> /tmp/tmpmic8qsk6/d5/d8cb6681c0fcaa6da34b57ec174d59/versions.yml + ├── test-eukaryota_odb10-busco.batch_summary.txt -> /tmp/tmp60hby2pk/6f/529873d91cda6bae3a4a6a21746aee/test-eukaryota_odb10-busco.batch_summary.txt + └── versions.yml -> /tmp/tmp60hby2pk/6f/529873d91cda6bae3a4a6a21746aee/versions.yml */ } @@ -186,9 +186,8 @@ workflow test_busco_eukaryote_augustus { ) /* Output tree: - /tmp/tmpo77wyvb9/busco/ - ├── test-eukaryota_odb10-busco -> /tmp/tmpshljnwcg/25/9891a19cbabda15a5c10fb5e34987f/test-eukaryota_odb10-busco/ - │ ├── batch_summary.txt + /tmp/tmp2xqaygjj/busco/ + ├── test-eukaryota_odb10-busco -> /tmp/tmpjqs61x9o/3f/67cc14e873c0ceb45e2a27594d624c/test-eukaryota_odb10-busco/ │ ├── genome.fasta/ │ │ ├── blast_db/ │ │ │ ├── genome.fasta.ndb @@ -210,7 +209,8 @@ workflow test_busco_eukaryote_augustus { │ │ └── hmmer_output/ │ └── logs/ │ └── busco.log - └── versions.yml -> /tmp/tmpshljnwcg/25/9891a19cbabda15a5c10fb5e34987f/versions.yml + ├── test-eukaryota_odb10-busco.batch_summary.txt -> /tmp/tmpjqs61x9o/3f/67cc14e873c0ceb45e2a27594d624c/test-eukaryota_odb10-busco.batch_summary.txt + └── versions.yml -> /tmp/tmpjqs61x9o/3f/67cc14e873c0ceb45e2a27594d624c/versions.yml */ } @@ -230,25 +230,25 @@ workflow test_busco_protein { ) /* Output tree: - /tmp/tmplju98s42/busco/ - ├── test-bacteria_odb10-busco -> /tmp/tmp0oru9_61/9c/e992f5eee84806770002e4510f51cb/test-bacteria_odb10-busco/ - │ ├── batch_summary.txt + /tmp/tmpzwd5dn56/busco/ + ├── short_summary.specific.bacteria_odb10.proteome.fasta.json -> /tmp/tmpk1nlgbf_/ae/0db07b5cd08fb23d0aba5f134ebbe2/short_summary.specific.bacteria_odb10.proteome.fasta.json + ├── short_summary.specific.bacteria_odb10.proteome.fasta.txt -> /tmp/tmpk1nlgbf_/ae/0db07b5cd08fb23d0aba5f134ebbe2/short_summary.specific.bacteria_odb10.proteome.fasta.txt + ├── test-bacteria_odb10-busco -> /tmp/tmpk1nlgbf_/ae/0db07b5cd08fb23d0aba5f134ebbe2/test-bacteria_odb10-busco/ │ ├── logs/ │ │ └── busco.log │ └── proteome.fasta/ │ ├── logs/ │ │ ├── hmmsearch_err.log │ │ └── hmmsearch_out.log - │ ├── run_bacteria_odb10/ - │ │ ├── busco_sequences/ - │ │ ├── full_table.tsv - │ │ ├── hmmer_output/ - │ │ ├── missing_busco_list.tsv - │ │ ├── short_summary.json - │ │ └── short_summary.txt - │ ├── short_summary.specific.bacteria_odb10.proteome.fasta.json - │ └── short_summary.specific.bacteria_odb10.proteome.fasta.txt - └── versions.yml -> /tmp/tmp0oru9_61/9c/e992f5eee84806770002e4510f51cb/versions.yml + │ └── run_bacteria_odb10/ + │ ├── busco_sequences/ + │ ├── full_table.tsv + │ ├── hmmer_output/ + │ ├── missing_busco_list.tsv + │ ├── short_summary.json + │ └── short_summary.txt + ├── test-bacteria_odb10-busco.batch_summary.txt -> /tmp/tmpk1nlgbf_/ae/0db07b5cd08fb23d0aba5f134ebbe2/test-bacteria_odb10-busco.batch_summary.txt + └── versions.yml -> /tmp/tmpk1nlgbf_/ae/0db07b5cd08fb23d0aba5f134ebbe2/versions.yml */ } workflow test_busco_transcriptome { @@ -266,9 +266,10 @@ workflow test_busco_transcriptome { ) /* Output tree: - /tmp/tmp5twpr8o9/busco/ - ├── test-bacteria_odb10-busco -> /tmp/tmp_qyjiads/0d/886515d0f06686b2227517398ef8ce/test-bacteria_odb10-busco/ - │ ├── batch_summary.txt + /tmp/tmpitjyvo9g/busco/ + ├── short_summary.specific.bacteria_odb10.test1.contigs.fa.json -> /tmp/tmp6wqi0eyx/4f/ed0b23f0fc807bb68091298845c135/short_summary.specific.bacteria_odb10.test1.contigs.fa.json + ├── short_summary.specific.bacteria_odb10.test1.contigs.fa.txt -> /tmp/tmp6wqi0eyx/4f/ed0b23f0fc807bb68091298845c135/short_summary.specific.bacteria_odb10.test1.contigs.fa.txt + ├── test-bacteria_odb10-busco -> /tmp/tmp6wqi0eyx/4f/ed0b23f0fc807bb68091298845c135/test-bacteria_odb10-busco/ │ ├── logs/ │ │ └── busco.log │ └── test1.contigs.fa/ @@ -296,8 +297,6 @@ workflow test_busco_transcriptome { │ │ ├── short_summary.json │ │ ├── short_summary.txt │ │ └── single_copy_proteins.faa - │ ├── short_summary.specific.bacteria_odb10.test1.contigs.fa.json - │ ├── short_summary.specific.bacteria_odb10.test1.contigs.fa.txt │ └── translated_proteins/ │ ├── 1024388at2.faa │ ├── 1054741at2.faa @@ -321,7 +320,8 @@ workflow test_busco_transcriptome { │ ├── 874197at2.faa │ ├── 932854at2.faa │ └── 95696at2.faa - └── versions.yml -> /tmp/tmp_qyjiads/0d/886515d0f06686b2227517398ef8ce/versions.yml + ├── test-bacteria_odb10-busco.batch_summary.txt -> /tmp/tmp6wqi0eyx/4f/ed0b23f0fc807bb68091298845c135/test-bacteria_odb10-busco.batch_summary.txt + └── versions.yml -> /tmp/tmp6wqi0eyx/4f/ed0b23f0fc807bb68091298845c135/versions.yml */ } diff --git a/tests/modules/busco/test.yml b/tests/modules/busco/test.yml index 2125c3a4..b838d9b7 100644 --- a/tests/modules/busco/test.yml +++ b/tests/modules/busco/test.yml @@ -2,9 +2,39 @@ command: nextflow run tests/modules/busco -entry test_busco_genome_single_fasta -c tests/config/nextflow.config tags: - busco - - bacteria - - genome files: + - path: output/busco/short_summary.specific.bacteria_odb10.genome.fna.json + contains: + - 'one_line_summary' + - 'input_file' + - 'mode' + - 'dataset' + - path: output/busco/short_summary.specific.bacteria_odb10.genome.fna.txt + contains: + - 'BUSCO version' + - 'The lineage dataset is' + - 'BUSCO was run in mode' + - 'Complete BUSCOs' + - 'Missing BUSCOs' + - 'Dependencies and versions' + - path: output/busco/short_summary.specific.bacteroidetes_odb10.genome.fna.json + contains: + - 'one_line_summary' + - 'input_file' + - 'mode' + - 'dataset' + - path: output/busco/short_summary.specific.bacteroidetes_odb10.genome.fna.txt + contains: + - 'BUSCO version' + - 'The lineage dataset is' + - 'BUSCO was run in mode' + - 'Complete BUSCOs' + - 'Missing BUSCOs' + - 'Dependencies and versions' + - path: output/busco/test-bacteria_odb10-busco.batch_summary.txt + md5sum: e50690742e9ae6abdd2bf99334ff9e12 + - path: output/busco/test-bacteroidetes_odb10-busco.batch_summary.txt + md5sum: 4c1b2c4317c88398eddc30877ed740d9 - path: output/busco/versions.yml md5sum: 8aa830f71587d859df35c6cfab59f35d @@ -12,9 +42,37 @@ command: nextflow run tests/modules/busco -entry test_busco_genome_multi_fasta -c tests/config/nextflow.config tags: - busco - - bacteria - - genome files: + - path: output/busco/short_summary.specific.bacteria_odb10.genome.fasta.json + contains: + - 'one_line_summary' + - 'input_file' + - 'mode' + - 'dataset' + - path: output/busco/short_summary.specific.bacteria_odb10.genome.fasta.txt + contains: + - 'BUSCO version' + - 'The lineage dataset is' + - 'BUSCO was run in mode' + - 'Complete BUSCOs' + - 'Missing BUSCOs' + - 'Dependencies and versions' + - path: output/busco/short_summary.specific.bacteria_odb10.genome.fna.json + contains: + - 'one_line_summary' + - 'input_file' + - 'mode' + - 'dataset' + - path: output/busco/short_summary.specific.bacteria_odb10.genome.fna.txt + contains: + - 'BUSCO version' + - 'The lineage dataset is' + - 'BUSCO was run in mode' + - 'Complete BUSCOs' + - 'Missing BUSCOs' + - 'Dependencies and versions' + - path: output/busco/test-bacteria_odb10-busco.batch_summary.txt + md5sum: 5360dfe83bec1f5741ee115e53e6b517 - path: output/busco/versions.yml md5sum: 9a959eb0a1f765777dff1ea2f5c139c0 @@ -22,10 +80,23 @@ command: nextflow run tests/modules/busco -entry test_busco_eukaryote_metaeuk -c tests/config/nextflow.config tags: - busco - - eukaryote - - genome - - metaeuk files: + - path: output/busco/short_summary.specific.eukaryota_odb10.genome.fasta.json + contains: + - 'one_line_summary' + - 'input_file' + - 'mode' + - 'dataset' + - path: output/busco/short_summary.specific.eukaryota_odb10.genome.fasta.txt + contains: + - 'BUSCO version' + - 'The lineage dataset is' + - 'BUSCO was run in mode' + - 'Complete BUSCOs' + - 'Missing BUSCOs' + - 'Dependencies and versions' + - path: output/busco/test-eukaryota_odb10-busco.batch_summary.txt + md5sum: a70806f99ba5706d7353d3353b3f1d2b - path: output/busco/versions.yml md5sum: 34a808c257e6db1b0456f3b4372bc477 @@ -33,10 +104,9 @@ command: nextflow run tests/modules/busco -entry test_busco_eukaryote_augustus -c tests/config/nextflow.config tags: - busco - - eukaryote - - genome - - augustus files: + - path: output/busco/test-eukaryota_odb10-busco.batch_summary.txt + md5sum: 660393dd43cd6a093b952d4b8ad41e40 - path: output/busco/versions.yml md5sum: 2caac915461410b16a1524ac064cd0df @@ -44,9 +114,23 @@ command: nextflow run tests/modules/busco -entry test_busco_protein -c tests/config/nextflow.config tags: - busco - - bacteria - - proteins files: + - path: output/busco/short_summary.specific.bacteria_odb10.proteome.fasta.json + contains: + - 'one_line_summary' + - 'input_file' + - 'mode' + - 'dataset' + - path: output/busco/short_summary.specific.bacteria_odb10.proteome.fasta.txt + contains: + - 'BUSCO version' + - 'The lineage dataset is' + - 'BUSCO was run in mode' + - 'Complete BUSCOs' + - 'Missing BUSCOs' + - 'Dependencies and versions' + - path: output/busco/test-bacteria_odb10-busco.batch_summary.txt + md5sum: fd3b4e30ce74d1fcb95d6286d6e2049f - path: output/busco/versions.yml md5sum: d7392261a57960a7e6aea609dce824f5 @@ -54,8 +138,22 @@ command: nextflow run tests/modules/busco -entry test_busco_transcriptome -c tests/config/nextflow.config tags: - busco - - bacteria - - transcriptome files: + - path: output/busco/short_summary.specific.bacteria_odb10.test1.contigs.fa.json + contains: + - 'one_line_summary' + - 'input_file' + - 'mode' + - 'dataset' + - path: output/busco/short_summary.specific.bacteria_odb10.test1.contigs.fa.txt + contains: + - 'BUSCO version' + - 'The lineage dataset is' + - 'BUSCO was run in mode' + - 'Complete BUSCOs' + - 'Missing BUSCOs' + - 'Dependencies and versions' + - path: output/busco/test-bacteria_odb10-busco.batch_summary.txt + md5sum: 9a176cafe66ac0adca89dc34ad2be13f - path: output/busco/versions.yml md5sum: 30eacbc7df70f6b1e72e0a7b6d02a7e1 From f65abe1e9a4e9d9932772a4d2355666b9cac9e61 Mon Sep 17 00:00:00 2001 From: Mahesh Binzer-Panchal Date: Wed, 4 May 2022 11:54:51 +0000 Subject: [PATCH 13/29] Prettier --- tests/modules/busco/test.yml | 140 +++++++++++++++++------------------ 1 file changed, 70 insertions(+), 70 deletions(-) diff --git a/tests/modules/busco/test.yml b/tests/modules/busco/test.yml index b838d9b7..43b810ba 100644 --- a/tests/modules/busco/test.yml +++ b/tests/modules/busco/test.yml @@ -5,32 +5,32 @@ files: - path: output/busco/short_summary.specific.bacteria_odb10.genome.fna.json contains: - - 'one_line_summary' - - 'input_file' - - 'mode' - - 'dataset' + - "one_line_summary" + - "input_file" + - "mode" + - "dataset" - path: output/busco/short_summary.specific.bacteria_odb10.genome.fna.txt contains: - - 'BUSCO version' - - 'The lineage dataset is' - - 'BUSCO was run in mode' - - 'Complete BUSCOs' - - 'Missing BUSCOs' - - 'Dependencies and versions' + - "BUSCO version" + - "The lineage dataset is" + - "BUSCO was run in mode" + - "Complete BUSCOs" + - "Missing BUSCOs" + - "Dependencies and versions" - path: output/busco/short_summary.specific.bacteroidetes_odb10.genome.fna.json contains: - - 'one_line_summary' - - 'input_file' - - 'mode' - - 'dataset' + - "one_line_summary" + - "input_file" + - "mode" + - "dataset" - path: output/busco/short_summary.specific.bacteroidetes_odb10.genome.fna.txt contains: - - 'BUSCO version' - - 'The lineage dataset is' - - 'BUSCO was run in mode' - - 'Complete BUSCOs' - - 'Missing BUSCOs' - - 'Dependencies and versions' + - "BUSCO version" + - "The lineage dataset is" + - "BUSCO was run in mode" + - "Complete BUSCOs" + - "Missing BUSCOs" + - "Dependencies and versions" - path: output/busco/test-bacteria_odb10-busco.batch_summary.txt md5sum: e50690742e9ae6abdd2bf99334ff9e12 - path: output/busco/test-bacteroidetes_odb10-busco.batch_summary.txt @@ -45,32 +45,32 @@ files: - path: output/busco/short_summary.specific.bacteria_odb10.genome.fasta.json contains: - - 'one_line_summary' - - 'input_file' - - 'mode' - - 'dataset' + - "one_line_summary" + - "input_file" + - "mode" + - "dataset" - path: output/busco/short_summary.specific.bacteria_odb10.genome.fasta.txt contains: - - 'BUSCO version' - - 'The lineage dataset is' - - 'BUSCO was run in mode' - - 'Complete BUSCOs' - - 'Missing BUSCOs' - - 'Dependencies and versions' + - "BUSCO version" + - "The lineage dataset is" + - "BUSCO was run in mode" + - "Complete BUSCOs" + - "Missing BUSCOs" + - "Dependencies and versions" - path: output/busco/short_summary.specific.bacteria_odb10.genome.fna.json contains: - - 'one_line_summary' - - 'input_file' - - 'mode' - - 'dataset' + - "one_line_summary" + - "input_file" + - "mode" + - "dataset" - path: output/busco/short_summary.specific.bacteria_odb10.genome.fna.txt contains: - - 'BUSCO version' - - 'The lineage dataset is' - - 'BUSCO was run in mode' - - 'Complete BUSCOs' - - 'Missing BUSCOs' - - 'Dependencies and versions' + - "BUSCO version" + - "The lineage dataset is" + - "BUSCO was run in mode" + - "Complete BUSCOs" + - "Missing BUSCOs" + - "Dependencies and versions" - path: output/busco/test-bacteria_odb10-busco.batch_summary.txt md5sum: 5360dfe83bec1f5741ee115e53e6b517 - path: output/busco/versions.yml @@ -83,18 +83,18 @@ files: - path: output/busco/short_summary.specific.eukaryota_odb10.genome.fasta.json contains: - - 'one_line_summary' - - 'input_file' - - 'mode' - - 'dataset' + - "one_line_summary" + - "input_file" + - "mode" + - "dataset" - path: output/busco/short_summary.specific.eukaryota_odb10.genome.fasta.txt contains: - - 'BUSCO version' - - 'The lineage dataset is' - - 'BUSCO was run in mode' - - 'Complete BUSCOs' - - 'Missing BUSCOs' - - 'Dependencies and versions' + - "BUSCO version" + - "The lineage dataset is" + - "BUSCO was run in mode" + - "Complete BUSCOs" + - "Missing BUSCOs" + - "Dependencies and versions" - path: output/busco/test-eukaryota_odb10-busco.batch_summary.txt md5sum: a70806f99ba5706d7353d3353b3f1d2b - path: output/busco/versions.yml @@ -117,18 +117,18 @@ files: - path: output/busco/short_summary.specific.bacteria_odb10.proteome.fasta.json contains: - - 'one_line_summary' - - 'input_file' - - 'mode' - - 'dataset' + - "one_line_summary" + - "input_file" + - "mode" + - "dataset" - path: output/busco/short_summary.specific.bacteria_odb10.proteome.fasta.txt contains: - - 'BUSCO version' - - 'The lineage dataset is' - - 'BUSCO was run in mode' - - 'Complete BUSCOs' - - 'Missing BUSCOs' - - 'Dependencies and versions' + - "BUSCO version" + - "The lineage dataset is" + - "BUSCO was run in mode" + - "Complete BUSCOs" + - "Missing BUSCOs" + - "Dependencies and versions" - path: output/busco/test-bacteria_odb10-busco.batch_summary.txt md5sum: fd3b4e30ce74d1fcb95d6286d6e2049f - path: output/busco/versions.yml @@ -141,18 +141,18 @@ files: - path: output/busco/short_summary.specific.bacteria_odb10.test1.contigs.fa.json contains: - - 'one_line_summary' - - 'input_file' - - 'mode' - - 'dataset' + - "one_line_summary" + - "input_file" + - "mode" + - "dataset" - path: output/busco/short_summary.specific.bacteria_odb10.test1.contigs.fa.txt contains: - - 'BUSCO version' - - 'The lineage dataset is' - - 'BUSCO was run in mode' - - 'Complete BUSCOs' - - 'Missing BUSCOs' - - 'Dependencies and versions' + - "BUSCO version" + - "The lineage dataset is" + - "BUSCO was run in mode" + - "Complete BUSCOs" + - "Missing BUSCOs" + - "Dependencies and versions" - path: output/busco/test-bacteria_odb10-busco.batch_summary.txt md5sum: 9a176cafe66ac0adca89dc34ad2be13f - path: output/busco/versions.yml From 865ad3447a2b74167a25f36e0471f7dbe42fce97 Mon Sep 17 00:00:00 2001 From: Mahesh Binzer-Panchal Date: Thu, 5 May 2022 08:23:54 +0000 Subject: [PATCH 14/29] Update meta.yml output files --- modules/busco/meta.yml | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/modules/busco/meta.yml b/modules/busco/meta.yml index 007d5e7c..19c2c991 100644 --- a/modules/busco/meta.yml +++ b/modules/busco/meta.yml @@ -40,6 +40,18 @@ output: description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] + - batch_summary: + type: file + description: Summary of all sequence files analyzed + pattern: "*-busco.batch_summary.txt" + - short_summaries_txt: + type: file + description: Short Busco summary in plain text format + pattern: "short_summary.*.txt" + - short_summaries_json: + type: file + description: Short Busco summary in JSON format + pattern: "short_summary.*.json" - busco_dir: type: directory description: BUSCO lineage specific output From 2542ae1823d57777434a63849e217eabf97b3a8e Mon Sep 17 00:00:00 2001 From: Mahesh Binzer-Panchal Date: Thu, 5 May 2022 16:33:32 +0200 Subject: [PATCH 15/29] Update modules/busco/main.nf Co-authored-by: Sateesh Peri <33637490+sateeshperi@users.noreply.github.com> --- modules/busco/main.nf | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/modules/busco/main.nf b/modules/busco/main.nf index f6bd63e2..15e7ce5c 100644 --- a/modules/busco/main.nf +++ b/modules/busco/main.nf @@ -8,10 +8,10 @@ process BUSCO { 'quay.io/biocontainers/busco:5.3.2--pyhdfd78af_0' }" input: - tuple val(meta), path(fasta, stageAs: 'tmp_input/*') // Required: meta map, and fasta sequence files - each lineage // Required: lineage to check against - path busco_lineages_path // Recommended: path to busco lineages - downloads if not set - path config_file // Optional: busco configuration file + tuple val(meta), path('tmp_input/*') // Required: meta map, and fasta sequence files + each lineage // Required: lineage to check against + path busco_lineages_path // Recommended: path to busco lineages - downloads if not set + path config_file // Optional: busco configuration file output: tuple val(meta), path("*-busco.batch_summary.txt"), emit: batch_summary From 1289626cc99eecb3436bd067cd90a85028957d5f Mon Sep 17 00:00:00 2001 From: Harshil Patel Date: Thu, 5 May 2022 16:09:45 +0100 Subject: [PATCH 16/29] Update modules/busco/main.nf --- modules/busco/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/busco/main.nf b/modules/busco/main.nf index 15e7ce5c..8c1b1dad 100644 --- a/modules/busco/main.nf +++ b/modules/busco/main.nf @@ -8,7 +8,7 @@ process BUSCO { 'quay.io/biocontainers/busco:5.3.2--pyhdfd78af_0' }" input: - tuple val(meta), path('tmp_input/*') // Required: meta map, and fasta sequence files + tuple val(meta), path('tmp_input/*') each lineage // Required: lineage to check against path busco_lineages_path // Recommended: path to busco lineages - downloads if not set path config_file // Optional: busco configuration file From 35c5828f43b50bc029b20b133d3878b3bc16ca9d Mon Sep 17 00:00:00 2001 From: Mahesh Binzer-Panchal Date: Fri, 6 May 2022 11:48:21 +0000 Subject: [PATCH 17/29] Add --auto-lineage option and remove single_end Co-authored-by: Jill V. Hagey, PhD --- modules/busco/main.nf | 5 +-- tests/modules/busco/main.nf | 67 ++++++++++++++++++++++++++---------- tests/modules/busco/test.yml | 30 ++++++++++++++++ 3 files changed, 82 insertions(+), 20 deletions(-) diff --git a/modules/busco/main.nf b/modules/busco/main.nf index 8c1b1dad..f0713862 100644 --- a/modules/busco/main.nf +++ b/modules/busco/main.nf @@ -9,7 +9,7 @@ process BUSCO { input: tuple val(meta), path('tmp_input/*') - each lineage // Required: lineage to check against + each lineage // Required: lineage to check against, "auto" enables --auto-lineage instead path busco_lineages_path // Recommended: path to busco lineages - downloads if not set path config_file // Optional: busco configuration file @@ -27,6 +27,7 @@ process BUSCO { def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}-${lineage}" def busco_config = config_file ? "--config $config_file" : '' + def busco_lineage = lineage.equals('auto') ? '--auto-lineage' : "--lineage_dataset ${lineage}" def busco_lineage_dir = busco_lineages_path ? "--offline --download_path ${busco_lineages_path}" : '' """ # Nextflow changes the container --entrypoint to /bin/bash (container default entrypoint: /usr/local/env-execute) @@ -63,7 +64,7 @@ process BUSCO { --cpu $task.cpus \\ --in "\$INPUT_SEQS" \\ --out ${prefix}-busco \\ - --lineage_dataset $lineage \\ + $busco_lineage \\ $busco_lineage_dir \\ $busco_config \\ $args diff --git a/tests/modules/busco/main.nf b/tests/modules/busco/main.nf index f0201d21..e290b965 100644 --- a/tests/modules/busco/main.nf +++ b/tests/modules/busco/main.nf @@ -4,28 +4,59 @@ nextflow.enable.dsl = 2 include { BUSCO } from '../../../modules/busco/main.nf' -// This tests genome decompression, empty input channels and data download workflow test_busco_genome_single_fasta { input = [ - [ id:'test', single_end:false ], // meta map + [ id:'test' ], // meta map file( params.test_data['bacteroides_fragilis']['genome']['genome_fna_gz'], checkIfExists: true) ] BUSCO ( input, - ['bacteria_odb10', 'bacteroidetes_odb10'], + ['auto','bacteria_odb10', 'bacteroidetes_odb10'], // Launch with auto to use --auto-lineage, and specified lineages [], // Download busco lineage [], // No config ) /* Output tree: - /tmp/tmpisa3ktco/busco/ - ├── short_summary.specific.bacteria_odb10.genome.fna.json -> /tmp/tmpm91x0mn2/8a/ff5c15baba0942cca15a8d53e98009/short_summary.specific.bacteria_odb10.genome.fna.json - ├── short_summary.specific.bacteria_odb10.genome.fna.txt -> /tmp/tmpm91x0mn2/8a/ff5c15baba0942cca15a8d53e98009/short_summary.specific.bacteria_odb10.genome.fna.txt - ├── short_summary.specific.bacteroidetes_odb10.genome.fna.json -> /tmp/tmpm91x0mn2/91/3abf602561d35fcd917711402977a3/short_summary.specific.bacteroidetes_odb10.genome.fna.json - ├── short_summary.specific.bacteroidetes_odb10.genome.fna.txt -> /tmp/tmpm91x0mn2/91/3abf602561d35fcd917711402977a3/short_summary.specific.bacteroidetes_odb10.genome.fna.txt - ├── test-bacteria_odb10-busco -> /tmp/tmpm91x0mn2/8a/ff5c15baba0942cca15a8d53e98009/test-bacteria_odb10-busco/ + /tmp/tmp846crjv2/busco/ + ├── short_summary.generic.bacteria_odb10.genome.fna.json -> /tmp/tmpi6af66j1/18/8be22ecd7a71471ff5082bd512972b/short_summary.generic.bacteria_odb10.genome.fna.json + ├── short_summary.generic.bacteria_odb10.genome.fna.txt -> /tmp/tmpi6af66j1/18/8be22ecd7a71471ff5082bd512972b/short_summary.generic.bacteria_odb10.genome.fna.txt + ├── short_summary.specific.bacteria_odb10.genome.fna.json -> /tmp/tmpi6af66j1/45/107812e983a8e695c380ebc215e7d9/short_summary.specific.bacteria_odb10.genome.fna.json + ├── short_summary.specific.bacteria_odb10.genome.fna.txt -> /tmp/tmpi6af66j1/45/107812e983a8e695c380ebc215e7d9/short_summary.specific.bacteria_odb10.genome.fna.txt + ├── short_summary.specific.bacteroidales_odb10.genome.fna.json -> /tmp/tmpi6af66j1/18/8be22ecd7a71471ff5082bd512972b/short_summary.specific.bacteroidales_odb10.genome.fna.json + ├── short_summary.specific.bacteroidales_odb10.genome.fna.txt -> /tmp/tmpi6af66j1/18/8be22ecd7a71471ff5082bd512972b/short_summary.specific.bacteroidales_odb10.genome.fna.txt + ├── short_summary.specific.bacteroidetes_odb10.genome.fna.json -> /tmp/tmpi6af66j1/a2/eb4a34894f3ac5554759ad6c9f652b/short_summary.specific.bacteroidetes_odb10.genome.fna.json + ├── short_summary.specific.bacteroidetes_odb10.genome.fna.txt -> /tmp/tmpi6af66j1/a2/eb4a34894f3ac5554759ad6c9f652b/short_summary.specific.bacteroidetes_odb10.genome.fna.txt + ├── test-auto-busco -> /tmp/tmpi6af66j1/18/8be22ecd7a71471ff5082bd512972b/test-auto-busco/ + │ ├── genome.fna/ + │ │ ├── auto_lineage/ + │ │ │ ├── run_archaea_odb10/ + │ │ │ ├── run_bacteria_odb10/ + │ │ │ └── run_eukaryota_odb10/ + │ │ ├── logs/ + │ │ │ ├── hmmsearch_err.log + │ │ │ ├── hmmsearch_out.log + │ │ │ ├── metaeuk_err.log + │ │ │ ├── metaeuk_out.log + │ │ │ ├── prodigal_err.log + │ │ │ ├── prodigal_out.log + │ │ │ ├── sepp_err.log + │ │ │ └── sepp_out.log + │ │ ├── prodigal_output/ + │ │ │ └── predicted_genes/ + │ │ ├── run_bacteria_odb10 -> /tmp/tmpi6af66j1/18/8be22ecd7a71471ff5082bd512972b/test-auto-busco/genome.fna/auto_lineage/run_bacteria_odb10/ [recursive, not followed] + │ │ └── run_bacteroidales_odb10/ + │ │ ├── busco_sequences/ + │ │ ├── full_table.tsv + │ │ ├── hmmer_output/ + │ │ ├── missing_busco_list.tsv + │ │ ├── short_summary.json + │ │ └── short_summary.txt + │ └── logs/ + │ └── busco.log + ├── test-auto-busco.batch_summary.txt -> /tmp/tmpi6af66j1/18/8be22ecd7a71471ff5082bd512972b/test-auto-busco.batch_summary.txt + ├── test-bacteria_odb10-busco -> /tmp/tmpi6af66j1/45/107812e983a8e695c380ebc215e7d9/test-bacteria_odb10-busco/ │ ├── genome.fna/ │ │ ├── logs/ │ │ │ ├── hmmsearch_err.log @@ -43,8 +74,8 @@ workflow test_busco_genome_single_fasta { │ │ └── short_summary.txt │ └── logs/ │ └── busco.log - ├── test-bacteria_odb10-busco.batch_summary.txt -> /tmp/tmpm91x0mn2/8a/ff5c15baba0942cca15a8d53e98009/test-bacteria_odb10-busco.batch_summary.txt - ├── test-bacteroidetes_odb10-busco -> /tmp/tmpm91x0mn2/91/3abf602561d35fcd917711402977a3/test-bacteroidetes_odb10-busco/ + ├── test-bacteria_odb10-busco.batch_summary.txt -> /tmp/tmpi6af66j1/45/107812e983a8e695c380ebc215e7d9/test-bacteria_odb10-busco.batch_summary.txt + ├── test-bacteroidetes_odb10-busco -> /tmp/tmpi6af66j1/a2/eb4a34894f3ac5554759ad6c9f652b/test-bacteroidetes_odb10-busco/ │ ├── genome.fna/ │ │ ├── logs/ │ │ │ ├── hmmsearch_err.log @@ -62,8 +93,8 @@ workflow test_busco_genome_single_fasta { │ │ └── short_summary.txt │ └── logs/ │ └── busco.log - ├── test-bacteroidetes_odb10-busco.batch_summary.txt -> /tmp/tmpm91x0mn2/91/3abf602561d35fcd917711402977a3/test-bacteroidetes_odb10-busco.batch_summary.txt - └── versions.yml -> /tmp/tmpm91x0mn2/91/3abf602561d35fcd917711402977a3/versions.yml + ├── test-bacteroidetes_odb10-busco.batch_summary.txt -> /tmp/tmpi6af66j1/a2/eb4a34894f3ac5554759ad6c9f652b/test-bacteroidetes_odb10-busco.batch_summary.txt + └── versions.yml -> /tmp/tmpi6af66j1/18/8be22ecd7a71471ff5082bd512972b/versions.yml */ } @@ -71,7 +102,7 @@ workflow test_busco_genome_single_fasta { workflow test_busco_genome_multi_fasta { input = [ - [ id:'test', single_end:false ], // meta map + [ id:'test' ], // meta map [ file( params.test_data['bacteroides_fragilis']['genome']['genome_fna_gz'], checkIfExists: true), file( params.test_data['candidatus_portiera_aleyrodidarum']['genome']['genome_fasta'], checkIfExists: true) @@ -133,7 +164,7 @@ workflow test_busco_genome_multi_fasta { workflow test_busco_eukaryote_metaeuk { input = [ - [ id:'test', single_end:false ], // meta map + [ id:'test' ], // meta map file( params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) ] @@ -174,7 +205,7 @@ workflow test_busco_eukaryote_metaeuk { workflow test_busco_eukaryote_augustus { input = [ - [ id:'test', single_end:false ], // meta map + [ id:'test' ], // meta map file( params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) ] @@ -218,7 +249,7 @@ workflow test_busco_eukaryote_augustus { workflow test_busco_protein { input = [ - [ id:'test', single_end:false ], // meta map + [ id:'test' ], // meta map file( params.test_data['candidatus_portiera_aleyrodidarum']['genome']['proteome_fasta'], checkIfExists: true) ] @@ -254,7 +285,7 @@ workflow test_busco_protein { workflow test_busco_transcriptome { input = [ - [ id:'test', single_end:false ], // meta map + [ id:'test' ], // meta map file( params.test_data['bacteroides_fragilis']['illumina']['test1_contigs_fa_gz'], checkIfExists: true) ] diff --git a/tests/modules/busco/test.yml b/tests/modules/busco/test.yml index 43b810ba..2a801e64 100644 --- a/tests/modules/busco/test.yml +++ b/tests/modules/busco/test.yml @@ -3,6 +3,20 @@ tags: - busco files: + - path: output/busco/short_summary.generic.bacteria_odb10.genome.fna.json + contains: + - "one_line_summary" + - "input_file" + - "mode" + - "dataset" + - path: output/busco/short_summary.generic.bacteria_odb10.genome.fna.txt + contains: + - "BUSCO version" + - "The lineage dataset is" + - "BUSCO was run in mode" + - "Complete BUSCOs" + - "Missing BUSCOs" + - "Dependencies and versions" - path: output/busco/short_summary.specific.bacteria_odb10.genome.fna.json contains: - "one_line_summary" @@ -17,6 +31,20 @@ - "Complete BUSCOs" - "Missing BUSCOs" - "Dependencies and versions" + - path: output/busco/short_summary.specific.bacteroidales_odb10.genome.fna.json + contains: + - "one_line_summary" + - "input_file" + - "mode" + - "dataset" + - path: output/busco/short_summary.specific.bacteroidales_odb10.genome.fna.txt + contains: + - "BUSCO version" + - "The lineage dataset is" + - "BUSCO was run in mode" + - "Complete BUSCOs" + - "Missing BUSCOs" + - "Dependencies and versions" - path: output/busco/short_summary.specific.bacteroidetes_odb10.genome.fna.json contains: - "one_line_summary" @@ -31,6 +59,8 @@ - "Complete BUSCOs" - "Missing BUSCOs" - "Dependencies and versions" + - path: output/busco/test-auto-busco.batch_summary.txt + md5sum: 32f7765c310f33555c31c7a3e64c990e - path: output/busco/test-bacteria_odb10-busco.batch_summary.txt md5sum: e50690742e9ae6abdd2bf99334ff9e12 - path: output/busco/test-bacteroidetes_odb10-busco.batch_summary.txt From 9fa6b6c1fc930830c1d819a9273b4ec12ffacb0a Mon Sep 17 00:00:00 2001 From: Mahesh Binzer-Panchal Date: Fri, 6 May 2022 11:56:19 +0000 Subject: [PATCH 18/29] Update meta.yml --- modules/busco/meta.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/busco/meta.yml b/modules/busco/meta.yml index 19c2c991..ef8c5245 100644 --- a/modules/busco/meta.yml +++ b/modules/busco/meta.yml @@ -26,7 +26,7 @@ input: pattern: "*.{fasta,fna,fa,fasta.gz,fna.gz,fa.gz}" - lineage: type: value - description: The BUSCO lineage to use. + description: The BUSCO lineage to use, or "auto" to automatically select lineage - busco_lineages_path: type: directory description: Path to local BUSCO lineages directory. From 3930ba227bc49541e25272eb9332a10d01a1b271 Mon Sep 17 00:00:00 2001 From: Matthias De Smet <11850640+matthdsm@users.noreply.github.com> Date: Fri, 6 May 2022 14:10:44 +0200 Subject: [PATCH 19/29] merge snapaligner modes into one --- modules/snapaligner/{paired => align}/main.nf | 7 +-- .../snapaligner/{paired => align}/meta.yml | 6 +-- modules/snapaligner/single/main.nf | 41 ---------------- modules/snapaligner/single/meta.yml | 48 ------------------- tests/modules/snapaligner/align/main.nf | 29 +++++++++++ .../{paired => align}/nextflow.config | 0 tests/modules/snapaligner/align/test.yml | 19 ++++++++ tests/modules/snapaligner/paired/main.nf | 17 ------- tests/modules/snapaligner/paired/test.yml | 9 ---- tests/modules/snapaligner/single/main.nf | 17 ------- .../snapaligner/single/nextflow.config | 5 -- tests/modules/snapaligner/single/test.yml | 9 ---- 12 files changed, 55 insertions(+), 152 deletions(-) rename modules/snapaligner/{paired => align}/main.nf (87%) rename modules/snapaligner/{paired => align}/meta.yml (86%) delete mode 100644 modules/snapaligner/single/main.nf delete mode 100644 modules/snapaligner/single/meta.yml create mode 100644 tests/modules/snapaligner/align/main.nf rename tests/modules/snapaligner/{paired => align}/nextflow.config (100%) create mode 100644 tests/modules/snapaligner/align/test.yml delete mode 100644 tests/modules/snapaligner/paired/main.nf delete mode 100644 tests/modules/snapaligner/paired/test.yml delete mode 100644 tests/modules/snapaligner/single/main.nf delete mode 100644 tests/modules/snapaligner/single/nextflow.config delete mode 100644 tests/modules/snapaligner/single/test.yml diff --git a/modules/snapaligner/paired/main.nf b/modules/snapaligner/align/main.nf similarity index 87% rename from modules/snapaligner/paired/main.nf rename to modules/snapaligner/align/main.nf index 57044893..ef7af561 100644 --- a/modules/snapaligner/paired/main.nf +++ b/modules/snapaligner/align/main.nf @@ -1,4 +1,4 @@ -process SNAPALIGNER_PAIRED { +process SNAPALIGNER_ALIGN { tag '$meta.id' label 'process_high' @@ -21,15 +21,16 @@ process SNAPALIGNER_PAIRED { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" + def subcmd = meta.single_end ? "single" : "paired" """ mkdir -p index mv $index index/ - snap-aligner paired \\ + snap-aligner ${subcmd} \\ index \\ ${reads.join(" ")} \\ - -o -bam ${prefix}.bam \\ + -o ${prefix}.bam \\ -t ${task.cpus} \\ $args diff --git a/modules/snapaligner/paired/meta.yml b/modules/snapaligner/align/meta.yml similarity index 86% rename from modules/snapaligner/paired/meta.yml rename to modules/snapaligner/align/meta.yml index b19e0174..611b0b71 100644 --- a/modules/snapaligner/paired/meta.yml +++ b/modules/snapaligner/align/meta.yml @@ -1,5 +1,5 @@ -name: "snapaligner_paired" -description: Performs paired end fastq alignment to a fasta reference using SNAP +name: "snapaligner_align" +description: Performs fastq alignment to a fasta reference using SNAP keywords: - alignment - map @@ -22,7 +22,7 @@ input: e.g. [ id:'test', single_end:false ] - reads: type: file - description: List of input fastq files of size 2 for fastq or 1 for bam + description: List of input fastq files of size 2 for paired fastq or 1 for bam or single fastq pattern: "*.{fastq.gz,fq.gz,fastq,fq,bam}" - index: type: file diff --git a/modules/snapaligner/single/main.nf b/modules/snapaligner/single/main.nf deleted file mode 100644 index b13e1153..00000000 --- a/modules/snapaligner/single/main.nf +++ /dev/null @@ -1,41 +0,0 @@ -process SNAPALIGNER_SINGLE { - tag '$meta.id' - label 'process_high' - - conda (params.enable_conda ? "bioconda::snap-aligner=2.0.1" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/snap-aligner:2.0.1--hd03093a_1': - 'quay.io/biocontainers/snap-aligner:2.0.1--hd03093a_1' }" - - input: - tuple val(meta), path(reads) - path index - - output: - tuple val(meta), path("*.bam"), emit: bam - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - - """ - mkdir -p index - mv $index index/ - - snap-aligner single \\ - index \\ - ${reads.join(" ")} \\ - -o -bam ${prefix}.bam \\ - -t ${task.cpus} \\ - $args - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - snapaligner: \$(snap-aligner 2>&1| head -n 1 | sed 's/^.*version //;s/.\$//') - END_VERSIONS - """ -} diff --git a/modules/snapaligner/single/meta.yml b/modules/snapaligner/single/meta.yml deleted file mode 100644 index e69cc721..00000000 --- a/modules/snapaligner/single/meta.yml +++ /dev/null @@ -1,48 +0,0 @@ -name: "snapaligner_single" -description: Performs single end fastq alignment to a fasta reference using SNAP -keywords: - - alignment - - map - - fastq - - bam - - sam -tools: - - "snapaligner": - description: "Scalable Nucleotide Alignment Program -- a fast and accurate read aligner for high-throughput sequencing data" - homepage: "http://snap.cs.berkeley.edu" - documentation: "https://1drv.ms/b/s!AhuEg_0yZD86hcpblUt-muHKYsG8fA?e=R8ogug" - tool_dev_url: "https://github.com/amplab/snap" - doi: "10.1101/2021.11.23.469039" - licence: "['Apache v2']" -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - reads: - type: file - description: List of single end input files - pattern: "*.{fastq.gz,fq.gz,fastq,fq,bam}" - - index: - type: file - description: List of SNAP genome index files - pattern: "{Genome,GenomeIndex,GenomeIndexHash,OverflowTable}" - -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - - bam: - type: file - description: Aligned BAM file - pattern: "*.{bam}" - -authors: - - "@matthdsm" diff --git a/tests/modules/snapaligner/align/main.nf b/tests/modules/snapaligner/align/main.nf new file mode 100644 index 00000000..4f5943fe --- /dev/null +++ b/tests/modules/snapaligner/align/main.nf @@ -0,0 +1,29 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { SNAPALIGNER_INDEX } from '../../../../modules/snapaligner/index/main.nf' +include { SNAPALIGNER_ALIGN as SNAPALIGNER_SINGLE } from '../../../../modules/snapaligner/align/main.nf' +include { SNAPALIGNER_ALIGN as SNAPALIGNER_PAIRED } from '../../../../modules/snapaligner/align/main.nf' + +workflow test_snapaligner_single { + + input = [ + [ id:'test', single_end:true ], // meta map + [file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true)] + ] + + SNAPALIGNER_INDEX ( file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true),[],[],[]) + SNAPALIGNER_SINGLE ( input, SNAPALIGNER_INDEX.out.index ) +} + +workflow test_snapaligner_paired { + + input = [ + [ id:'test', single_end:false ], // meta map + [file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true)] + ] + + SNAPALIGNER_INDEX ( file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true),[],[],[]) + SNAPALIGNER_PAIRED ( input, SNAPALIGNER_INDEX.out.index ) +} diff --git a/tests/modules/snapaligner/paired/nextflow.config b/tests/modules/snapaligner/align/nextflow.config similarity index 100% rename from tests/modules/snapaligner/paired/nextflow.config rename to tests/modules/snapaligner/align/nextflow.config diff --git a/tests/modules/snapaligner/align/test.yml b/tests/modules/snapaligner/align/test.yml new file mode 100644 index 00000000..9cc7e230 --- /dev/null +++ b/tests/modules/snapaligner/align/test.yml @@ -0,0 +1,19 @@ +- name: snapaligner align test_snapaligner_single + command: nextflow run tests/modules/snapaligner/align -entry test_snapaligner_single -c tests/config/nextflow.config + tags: + - snapaligner/single + - snapaligner + files: + - path: output/snapaligner/test.bam + md5sum: 5d95594e4ef1ee23ce56e6a7cb64f0f2 + - path: output/snapaligner/versions.yml + +- name: snapaligner align test_snapaligner_paired + command: nextflow run tests/modules/snapaligner/align -entry test_snapaligner_paired -c tests/config/nextflow.config + tags: + - snapaligner/paired + - snapaligner + files: + - path: output/snapaligner/test.bam + md5sum: a1405da5876f15dbe8a81516b94c2a15 + - path: output/snapaligner/versions.yml diff --git a/tests/modules/snapaligner/paired/main.nf b/tests/modules/snapaligner/paired/main.nf deleted file mode 100644 index b25ca8c2..00000000 --- a/tests/modules/snapaligner/paired/main.nf +++ /dev/null @@ -1,17 +0,0 @@ -#!/usr/bin/env nextflow - -nextflow.enable.dsl = 2 - -include { SNAPALIGNER_INDEX } from '../../../../modules/snapaligner/index/main.nf' -include { SNAPALIGNER_PAIRED } from '../../../../modules/snapaligner/paired/main.nf' - -workflow test_snapaligner_paired { - - input = [ - [ id:'test', single_end:false ], // meta map - [file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true)] - ] - - SNAPALIGNER_INDEX ( file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true),[],[],[]) - SNAPALIGNER_PAIRED ( input, SNAPALIGNER_INDEX.out.index ) -} diff --git a/tests/modules/snapaligner/paired/test.yml b/tests/modules/snapaligner/paired/test.yml deleted file mode 100644 index 7df1e02b..00000000 --- a/tests/modules/snapaligner/paired/test.yml +++ /dev/null @@ -1,9 +0,0 @@ -- name: snapaligner paired test_snapaligner_paired - command: nextflow run tests/modules/snapaligner/paired -entry test_snapaligner_paired -c tests/config/nextflow.config - tags: - - snapaligner - - snapaligner/paired - files: - - path: output/snapaligner/test.bam - md5sum: 2ac92e9539fa246dd6db52b5de56fca5 - - path: output/snapaligner/versions.yml diff --git a/tests/modules/snapaligner/single/main.nf b/tests/modules/snapaligner/single/main.nf deleted file mode 100644 index 616e517a..00000000 --- a/tests/modules/snapaligner/single/main.nf +++ /dev/null @@ -1,17 +0,0 @@ -#!/usr/bin/env nextflow - -nextflow.enable.dsl = 2 - -include { SNAPALIGNER_INDEX } from '../../../../modules/snapaligner/index/main.nf' -include { SNAPALIGNER_SINGLE } from '../../../../modules/snapaligner/single/main.nf' - -workflow test_snapaligner_single { - - input = [ - [ id:'test', single_end:false ], // meta map - [file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true)] - ] - - SNAPALIGNER_INDEX ( file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true),[],[],[]) - SNAPALIGNER_SINGLE ( input, SNAPALIGNER_INDEX.out.index ) -} diff --git a/tests/modules/snapaligner/single/nextflow.config b/tests/modules/snapaligner/single/nextflow.config deleted file mode 100644 index 50f50a7a..00000000 --- a/tests/modules/snapaligner/single/nextflow.config +++ /dev/null @@ -1,5 +0,0 @@ -process { - - publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } - -} \ No newline at end of file diff --git a/tests/modules/snapaligner/single/test.yml b/tests/modules/snapaligner/single/test.yml deleted file mode 100644 index bbcbba1f..00000000 --- a/tests/modules/snapaligner/single/test.yml +++ /dev/null @@ -1,9 +0,0 @@ -- name: snapaligner single test_snapaligner_single - command: nextflow run tests/modules/snapaligner/single -entry test_snapaligner_single -c tests/config/nextflow.config - tags: - - snapaligner/single - - snapaligner - files: - - path: output/snapaligner/test.bam - md5sum: 696f7ea8e1aa5f9d7dafb9d0134fe25d - - path: output/snapaligner/versions.yml From 26d2231e854b909fcf0d2a4971c01a7030ae6cf2 Mon Sep 17 00:00:00 2001 From: Matthias De Smet <11850640+matthdsm@users.noreply.github.com> Date: Fri, 6 May 2022 14:16:38 +0200 Subject: [PATCH 20/29] fix linting --- tests/config/pytest_modules.yml | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/tests/config/pytest_modules.yml b/tests/config/pytest_modules.yml index effe6fe2..2c9ea7d4 100644 --- a/tests/config/pytest_modules.yml +++ b/tests/config/pytest_modules.yml @@ -1763,13 +1763,9 @@ snapaligner/index: - modules/snapaligner/index/** - tests/modules/snapaligner/index/** -snapaligner/paired: - - modules/snapaligner/paired/** - - tests/modules/snapaligner/paired/** - -snapaligner/single: - - modules/snapaligner/single/** - - tests/modules/snapaligner/single/** +snapaligner/align: + - modules/snapaligner/align/** + - tests/modules/snapaligner/align/** snpdists: - modules/snpdists/** From 7a85760d080d74ea8b883f9255a6d3861fd7d44b Mon Sep 17 00:00:00 2001 From: Mahesh Binzer-Panchal Date: Fri, 6 May 2022 13:33:27 +0000 Subject: [PATCH 21/29] remove 'auto' test --- tests/modules/busco/main.nf | 48 +++++++++++++++++++++++++++++++++++- tests/modules/busco/test.yml | 30 ---------------------- 2 files changed, 47 insertions(+), 31 deletions(-) diff --git a/tests/modules/busco/main.nf b/tests/modules/busco/main.nf index e290b965..598e148a 100644 --- a/tests/modules/busco/main.nf +++ b/tests/modules/busco/main.nf @@ -13,12 +13,58 @@ workflow test_busco_genome_single_fasta { BUSCO ( input, - ['auto','bacteria_odb10', 'bacteroidetes_odb10'], // Launch with auto to use --auto-lineage, and specified lineages + ['bacteria_odb10', 'bacteroidetes_odb10'], // Launch with 'auto' to use --auto-lineage, and specified lineages // 'auto' removed from test due to memory issues [], // Download busco lineage [], // No config ) /* Output tree: + /tmp/tmpyz_hi62i/busco/ + ├── short_summary.specific.bacteria_odb10.genome.fna.json -> /tmp/tmpza_0dth3/33/7d8c9b2c8931d9ad6a67aa843895e7/short_summary.specific.bacteria_odb10.genome.fna.json + ├── short_summary.specific.bacteria_odb10.genome.fna.txt -> /tmp/tmpza_0dth3/33/7d8c9b2c8931d9ad6a67aa843895e7/short_summary.specific.bacteria_odb10.genome.fna.txt + ├── short_summary.specific.bacteroidetes_odb10.genome.fna.json -> /tmp/tmpza_0dth3/6a/e95a0cd21785ce33d63b8f73a68a51/short_summary.specific.bacteroidetes_odb10.genome.fna.json + ├── short_summary.specific.bacteroidetes_odb10.genome.fna.txt -> /tmp/tmpza_0dth3/6a/e95a0cd21785ce33d63b8f73a68a51/short_summary.specific.bacteroidetes_odb10.genome.fna.txt + ├── test-bacteria_odb10-busco -> /tmp/tmpza_0dth3/33/7d8c9b2c8931d9ad6a67aa843895e7/test-bacteria_odb10-busco/ + │ ├── genome.fna/ + │ │ ├── logs/ + │ │ │ ├── hmmsearch_err.log + │ │ │ ├── hmmsearch_out.log + │ │ │ ├── prodigal_err.log + │ │ │ └── prodigal_out.log + │ │ ├── prodigal_output/ + │ │ │ └── predicted_genes/ + │ │ └── run_bacteria_odb10/ + │ │ ├── busco_sequences/ + │ │ ├── full_table.tsv + │ │ ├── hmmer_output/ + │ │ ├── missing_busco_list.tsv + │ │ ├── short_summary.json + │ │ └── short_summary.txt + │ └── logs/ + │ └── busco.log + ├── test-bacteria_odb10-busco.batch_summary.txt -> /tmp/tmpza_0dth3/33/7d8c9b2c8931d9ad6a67aa843895e7/test-bacteria_odb10-busco.batch_summary.txt + ├── test-bacteroidetes_odb10-busco -> /tmp/tmpza_0dth3/6a/e95a0cd21785ce33d63b8f73a68a51/test-bacteroidetes_odb10-busco/ + │ ├── genome.fna/ + │ │ ├── logs/ + │ │ │ ├── hmmsearch_err.log + │ │ │ ├── hmmsearch_out.log + │ │ │ ├── prodigal_err.log + │ │ │ └── prodigal_out.log + │ │ ├── prodigal_output/ + │ │ │ └── predicted_genes/ + │ │ └── run_bacteroidetes_odb10/ + │ │ ├── busco_sequences/ + │ │ ├── full_table.tsv + │ │ ├── hmmer_output/ + │ │ ├── missing_busco_list.tsv + │ │ ├── short_summary.json + │ │ └── short_summary.txt + │ └── logs/ + │ └── busco.log + ├── test-bacteroidetes_odb10-busco.batch_summary.txt -> /tmp/tmpza_0dth3/6a/e95a0cd21785ce33d63b8f73a68a51/test-bacteroidetes_odb10-busco.batch_summary.txt + └── versions.yml -> /tmp/tmpza_0dth3/6a/e95a0cd21785ce33d63b8f73a68a51/versions.yml + + Former Output tree -w 'auto': /tmp/tmp846crjv2/busco/ ├── short_summary.generic.bacteria_odb10.genome.fna.json -> /tmp/tmpi6af66j1/18/8be22ecd7a71471ff5082bd512972b/short_summary.generic.bacteria_odb10.genome.fna.json ├── short_summary.generic.bacteria_odb10.genome.fna.txt -> /tmp/tmpi6af66j1/18/8be22ecd7a71471ff5082bd512972b/short_summary.generic.bacteria_odb10.genome.fna.txt diff --git a/tests/modules/busco/test.yml b/tests/modules/busco/test.yml index 2a801e64..43b810ba 100644 --- a/tests/modules/busco/test.yml +++ b/tests/modules/busco/test.yml @@ -3,20 +3,6 @@ tags: - busco files: - - path: output/busco/short_summary.generic.bacteria_odb10.genome.fna.json - contains: - - "one_line_summary" - - "input_file" - - "mode" - - "dataset" - - path: output/busco/short_summary.generic.bacteria_odb10.genome.fna.txt - contains: - - "BUSCO version" - - "The lineage dataset is" - - "BUSCO was run in mode" - - "Complete BUSCOs" - - "Missing BUSCOs" - - "Dependencies and versions" - path: output/busco/short_summary.specific.bacteria_odb10.genome.fna.json contains: - "one_line_summary" @@ -31,20 +17,6 @@ - "Complete BUSCOs" - "Missing BUSCOs" - "Dependencies and versions" - - path: output/busco/short_summary.specific.bacteroidales_odb10.genome.fna.json - contains: - - "one_line_summary" - - "input_file" - - "mode" - - "dataset" - - path: output/busco/short_summary.specific.bacteroidales_odb10.genome.fna.txt - contains: - - "BUSCO version" - - "The lineage dataset is" - - "BUSCO was run in mode" - - "Complete BUSCOs" - - "Missing BUSCOs" - - "Dependencies and versions" - path: output/busco/short_summary.specific.bacteroidetes_odb10.genome.fna.json contains: - "one_line_summary" @@ -59,8 +31,6 @@ - "Complete BUSCOs" - "Missing BUSCOs" - "Dependencies and versions" - - path: output/busco/test-auto-busco.batch_summary.txt - md5sum: 32f7765c310f33555c31c7a3e64c990e - path: output/busco/test-bacteria_odb10-busco.batch_summary.txt md5sum: e50690742e9ae6abdd2bf99334ff9e12 - path: output/busco/test-bacteroidetes_odb10-busco.batch_summary.txt From ed09978222ca8fba9b4f9a872b84d8a75acdfc5c Mon Sep 17 00:00:00 2001 From: "Moritz E. Beber" Date: Fri, 6 May 2022 14:16:15 +0200 Subject: [PATCH 22/29] chore: update build number --- modules/sratools/prefetch/main.nf | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/modules/sratools/prefetch/main.nf b/modules/sratools/prefetch/main.nf index 2233d0aa..75fa17a1 100644 --- a/modules/sratools/prefetch/main.nf +++ b/modules/sratools/prefetch/main.nf @@ -5,8 +5,8 @@ process SRATOOLS_PREFETCH { conda (params.enable_conda ? 'bioconda::sra-tools=2.11.0' : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/sra-tools:2.11.0--pl5262h314213e_0' : - 'quay.io/biocontainers/sra-tools:2.11.0--pl5262h314213e_0' }" + 'https://depot.galaxyproject.org/singularity/sra-tools:2.11.0--pl5321ha49a11a_3' : + 'quay.io/biocontainers/sra-tools:2.11.0--pl5321ha49a11a_3' }" input: tuple val(meta), val(id) @@ -30,7 +30,6 @@ process SRATOOLS_PREFETCH { prefetch \\ $args \\ - --progress \\ $id vdb-validate $id From dd1c66783ae5fd2cd8b416687e465d9f1b558282 Mon Sep 17 00:00:00 2001 From: "Moritz E. Beber" Date: Fri, 6 May 2022 17:09:01 +0200 Subject: [PATCH 23/29] refactor: use a template to add retrying --- modules/sratools/prefetch/main.nf | 27 ++------- .../prefetch/templates/retry_with_backoff.sh | 59 +++++++++++++++++++ 2 files changed, 65 insertions(+), 21 deletions(-) create mode 100755 modules/sratools/prefetch/templates/retry_with_backoff.sh diff --git a/modules/sratools/prefetch/main.nf b/modules/sratools/prefetch/main.nf index 75fa17a1..92a66616 100644 --- a/modules/sratools/prefetch/main.nf +++ b/modules/sratools/prefetch/main.nf @@ -12,31 +12,16 @@ process SRATOOLS_PREFETCH { tuple val(meta), val(id) output: - tuple val(meta), path("$id"), emit: sra + tuple val(meta), path(id), emit: sra path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when - script: - def args = task.ext.args ?: '' - def config = "/LIBS/GUID = \"${UUID.randomUUID().toString()}\"\\n/libs/cloud/report_instance_identity = \"true\"\\n" - """ - eval "\$(vdb-config -o n NCBI_SETTINGS | sed 's/[" ]//g')" - if [[ ! -f "\${NCBI_SETTINGS}" ]]; then - mkdir -p "\$(dirname "\${NCBI_SETTINGS}")" - printf '${config}' > "\${NCBI_SETTINGS}" - fi + shell: + args = task.ext.args ?: '' + args2 = task.ext.args2 ?: '5 1 100' // + config = "/LIBS/GUID = \"${UUID.randomUUID().toString()}\"\\n/libs/cloud/report_instance_identity = \"true\"\\n" - prefetch \\ - $args \\ - $id - - vdb-validate $id - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - sratools: \$(prefetch --version 2>&1 | grep -Eo '[0-9.]+') - END_VERSIONS - """ + template 'retry_with_backoff.sh' } diff --git a/modules/sratools/prefetch/templates/retry_with_backoff.sh b/modules/sratools/prefetch/templates/retry_with_backoff.sh new file mode 100755 index 00000000..fbcb6532 --- /dev/null +++ b/modules/sratools/prefetch/templates/retry_with_backoff.sh @@ -0,0 +1,59 @@ +#!/usr/bin/env bash + +set -u + +retry_with_backoff() { + local max_attempts=${1} + local delay=${2} + local max_time=${3} + local attempt=1 + local output= + local status= + + # Remove the first three arguments to this function in order to access + # the 'real' command with `${@}`. + shift 3 + + while [ ${attempt} -le ${max_attempts} ]; do + output=$("${@}") + status=${?} + + if [ ${status} -eq 0 ]; then + break + fi + + if [ ${attempt} -lt ${max_attempts} ]; then + echo "Failed attempt ${attempt} of ${max_attempts}. Retrying in ${delay} s." >&2 + sleep ${delay} + elif [ ${attempt} -eq ${max_attempts} ]; then + echo "Failed after ${attempt} attempts." >&2 + return ${status} + fi + + attempt=$(( ${attempt} + 1 )) + delay=$(( ${delay} * 2 )) + if [ ${delay} -ge ${max_time} ]; then + delay=${max_time} + fi + done + + echo "${output}" +} + +eval "$(vdb-config -o n NCBI_SETTINGS | sed 's/[" ]//g')" +if [[ ! -f "${NCBI_SETTINGS}" ]]; then + mkdir -p "$(dirname "${NCBI_SETTINGS}")" + printf '!{config}' > "${NCBI_SETTINGS}" +fi + +retry_with_backoff !{args2} \ + prefetch \ + !{args} \ + !{id} + +vdb-validate !{id} + +cat <<-END_VERSIONS > versions.yml +"!{task.process}": + sratools: $(prefetch --version 2>&1 | grep -Eo '[0-9.]+') +END_VERSIONS From ea41c753c0d8348260dd552f58dc0aca7bdd4d51 Mon Sep 17 00:00:00 2001 From: "Moritz E. Beber" Date: Fri, 6 May 2022 17:28:56 +0200 Subject: [PATCH 24/29] fix: remove retry label Call gets retried by bash script --- modules/sratools/prefetch/main.nf | 1 - 1 file changed, 1 deletion(-) diff --git a/modules/sratools/prefetch/main.nf b/modules/sratools/prefetch/main.nf index 92a66616..3408c3e9 100644 --- a/modules/sratools/prefetch/main.nf +++ b/modules/sratools/prefetch/main.nf @@ -1,7 +1,6 @@ process SRATOOLS_PREFETCH { tag "$id" label 'process_low' - label 'error_retry' conda (params.enable_conda ? 'bioconda::sra-tools=2.11.0' : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? From ec7b460fa4a5ed50d7b0e4c73f984a9710bb703d Mon Sep 17 00:00:00 2001 From: "Maxime U. Garcia" Date: Fri, 6 May 2022 17:54:35 +0200 Subject: [PATCH 25/29] add missing file --- tests/config/test_data.config | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/config/test_data.config b/tests/config/test_data.config index e9a5f4ab..516e6238 100644 --- a/tests/config/test_data.config +++ b/tests/config/test_data.config @@ -125,6 +125,7 @@ params { genome_gff3 = "${test_data_dir}/genomics/homo_sapiens/genome/genome.gff3" genome_gtf = "${test_data_dir}/genomics/homo_sapiens/genome/genome.gtf" genome_interval_list = "${test_data_dir}/genomics/homo_sapiens/genome/genome.interval_list" + genome_multi_interval_bed = "${test_data_dir}/genomics/homo_sapiens/genome/multi_intervals.bed" genome_sizes = "${test_data_dir}/genomics/homo_sapiens/genome/genome.sizes" genome_bed = "${test_data_dir}/genomics/homo_sapiens/genome/genome.bed" genome_header = "${test_data_dir}/genomics/homo_sapiens/genome/genome.header" From ac217479e9cb4579060a6631405619f6dc8e6a92 Mon Sep 17 00:00:00 2001 From: "Maxime U. Garcia" Date: Fri, 6 May 2022 18:18:32 +0200 Subject: [PATCH 26/29] Update tests/config/test_data.config Co-authored-by: Jose Espinosa-Carrasco --- tests/config/test_data.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/config/test_data.config b/tests/config/test_data.config index 516e6238..6fd65ab7 100644 --- a/tests/config/test_data.config +++ b/tests/config/test_data.config @@ -125,7 +125,7 @@ params { genome_gff3 = "${test_data_dir}/genomics/homo_sapiens/genome/genome.gff3" genome_gtf = "${test_data_dir}/genomics/homo_sapiens/genome/genome.gtf" genome_interval_list = "${test_data_dir}/genomics/homo_sapiens/genome/genome.interval_list" - genome_multi_interval_bed = "${test_data_dir}/genomics/homo_sapiens/genome/multi_intervals.bed" + genome_multi_interval_bed = "${test_data_dir}/genomics/homo_sapiens/genome/genome.multi_intervals.bed" genome_sizes = "${test_data_dir}/genomics/homo_sapiens/genome/genome.sizes" genome_bed = "${test_data_dir}/genomics/homo_sapiens/genome/genome.bed" genome_header = "${test_data_dir}/genomics/homo_sapiens/genome/genome.header" From dd458e3388d50212e447e85794f1b027abda5c93 Mon Sep 17 00:00:00 2001 From: Matthias De Smet <11850640+matthdsm@users.noreply.github.com> Date: Fri, 6 May 2022 18:57:17 +0200 Subject: [PATCH 27/29] fix tags --- tests/modules/snapaligner/align/test.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/modules/snapaligner/align/test.yml b/tests/modules/snapaligner/align/test.yml index 9cc7e230..4e4fae61 100644 --- a/tests/modules/snapaligner/align/test.yml +++ b/tests/modules/snapaligner/align/test.yml @@ -1,7 +1,7 @@ - name: snapaligner align test_snapaligner_single command: nextflow run tests/modules/snapaligner/align -entry test_snapaligner_single -c tests/config/nextflow.config tags: - - snapaligner/single + - snapaligner/align - snapaligner files: - path: output/snapaligner/test.bam @@ -11,7 +11,7 @@ - name: snapaligner align test_snapaligner_paired command: nextflow run tests/modules/snapaligner/align -entry test_snapaligner_paired -c tests/config/nextflow.config tags: - - snapaligner/paired + - snapaligner/align - snapaligner files: - path: output/snapaligner/test.bam From 1013be2623c074dc3b0e6ed7a37185188c5305f0 Mon Sep 17 00:00:00 2001 From: MaxUlysse Date: Sat, 7 May 2022 15:22:52 +0200 Subject: [PATCH 28/29] fix path to script --- tests/subworkflows/nf-core/annotation/snpeff/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/subworkflows/nf-core/annotation/snpeff/main.nf b/tests/subworkflows/nf-core/annotation/snpeff/main.nf index 4aee20ee..bb56b46b 100644 --- a/tests/subworkflows/nf-core/annotation/snpeff/main.nf +++ b/tests/subworkflows/nf-core/annotation/snpeff/main.nf @@ -2,7 +2,7 @@ nextflow.enable.dsl = 2 -include { ANNOTATION_SNPEFF } from '../../../../../subworkflows/nf-core/annotation_snpeff/main' +include { ANNOTATION_SNPEFF } from '../../../../../subworkflows/nf-core/annotation/snpeff/main' workflow annotation_snpeff { input = [ From 3d428e29332f1b332b7ffa1f36f8838a6fd90f0b Mon Sep 17 00:00:00 2001 From: MaxUlysse Date: Sat, 7 May 2022 15:28:40 +0200 Subject: [PATCH 29/29] fix spacing --- tests/config/test_data.config | 254 +++++++++++++++++----------------- 1 file changed, 127 insertions(+), 127 deletions(-) diff --git a/tests/config/test_data.config b/tests/config/test_data.config index 6fd65ab7..4ff6efd9 100644 --- a/tests/config/test_data.config +++ b/tests/config/test_data.config @@ -125,7 +125,7 @@ params { genome_gff3 = "${test_data_dir}/genomics/homo_sapiens/genome/genome.gff3" genome_gtf = "${test_data_dir}/genomics/homo_sapiens/genome/genome.gtf" genome_interval_list = "${test_data_dir}/genomics/homo_sapiens/genome/genome.interval_list" - genome_multi_interval_bed = "${test_data_dir}/genomics/homo_sapiens/genome/genome.multi_intervals.bed" + genome_multi_interval_bed = "${test_data_dir}/genomics/homo_sapiens/genome/genome.multi_intervals.bed" genome_sizes = "${test_data_dir}/genomics/homo_sapiens/genome/genome.sizes" genome_bed = "${test_data_dir}/genomics/homo_sapiens/genome/genome.bed" genome_header = "${test_data_dir}/genomics/homo_sapiens/genome/genome.header" @@ -182,16 +182,16 @@ params { vcfanno_toml = "${test_data_dir}/genomics/homo_sapiens/genome/vcf/vcfanno/vcfanno.toml" } 'pangenome' { - pangenome_fa = "${test_data_dir}/pangenomics/homo_sapiens/pangenome.fa" - pangenome_fa_gz = "${test_data_dir}/pangenomics/homo_sapiens/pangenome.fa.gz" - pangenome_paf = "${test_data_dir}/pangenomics/homo_sapiens/pangenome.paf" - pangenome_paf_gz = "${test_data_dir}/pangenomics/homo_sapiens/pangenome.paf.gz" - pangenome_seqwish_gfa = "${test_data_dir}/pangenomics/homo_sapiens/pangenome.seqwish.gfa" - pangenome_smoothxg_gfa = "${test_data_dir}/pangenomics/homo_sapiens/pangenome.smoothxg.gfa" - pangenome_gfaffix_gfa = "${test_data_dir}/pangenomics/homo_sapiens/pangenome.gfaffix.gfa" + pangenome_fa = "${test_data_dir}/pangenomics/homo_sapiens/pangenome.fa" + pangenome_fa_gz = "${test_data_dir}/pangenomics/homo_sapiens/pangenome.fa.gz" + pangenome_paf = "${test_data_dir}/pangenomics/homo_sapiens/pangenome.paf" + pangenome_paf_gz = "${test_data_dir}/pangenomics/homo_sapiens/pangenome.paf.gz" + pangenome_seqwish_gfa = "${test_data_dir}/pangenomics/homo_sapiens/pangenome.seqwish.gfa" + pangenome_smoothxg_gfa = "${test_data_dir}/pangenomics/homo_sapiens/pangenome.smoothxg.gfa" + pangenome_gfaffix_gfa = "${test_data_dir}/pangenomics/homo_sapiens/pangenome.gfaffix.gfa" 'odgi' { - pangenome_og = "${test_data_dir}/pangenomics/homo_sapiens/odgi/pangenome.og" - pangenome_lay = "${test_data_dir}/pangenomics/homo_sapiens/odgi/pangenome.lay" + pangenome_og = "${test_data_dir}/pangenomics/homo_sapiens/odgi/pangenome.og" + pangenome_lay = "${test_data_dir}/pangenomics/homo_sapiens/odgi/pangenome.lay" } } 'illumina' { @@ -212,131 +212,131 @@ params { test_paired_end_hla = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/example_hla_pe.bam" test_paired_end_hla_sorted_bam = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/example_hla_pe.sorted.bam" test_paired_end_hla_sorted_bam_bai = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/example_hla_pe.sorted.bam.bai" - test2_paired_end_sorted_bam = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/test2.paired_end.sorted.bam" - test2_paired_end_sorted_bam_bai = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/test2.paired_end.sorted.bam.bai" - test2_paired_end_name_sorted_bam = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/test2.paired_end.name.sorted.bam" - test2_paired_end_markduplicates_sorted_bam = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/test2.paired_end.markduplicates.sorted.bam" - test2_paired_end_markduplicates_sorted_bam_bai = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/test2.paired_end.markduplicates.sorted.bam.bai" - test2_paired_end_recalibrated_sorted_bam = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/test2.paired_end.recalibrated.sorted.bam" - test2_paired_end_recalibrated_sorted_bam_bai = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/test2.paired_end.recalibrated.sorted.bam.bai" - test2_paired_end_umi_consensus_bam = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/umi/test2.paired_end.umi_consensus.bam" - test2_paired_end_umi_converted_bam = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/umi/test2.paired_end.umi_converted.bam" - test2_paired_end_umi_grouped_bam = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/umi/test2.paired_end.umi_grouped.bam" - test2_paired_end_umi_histogram_txt = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/umi/test2.paired_end.umi_histogram.txt" - test2_paired_end_umi_unsorted_bam = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/umi/test2.paired_end.umi_unsorted.bam" - test2_paired_end_umi_unsorted_tagged_bam = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/umi/test2.paired_end.unsorted_tagged.bam" + test2_paired_end_sorted_bam = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/test2.paired_end.sorted.bam" + test2_paired_end_sorted_bam_bai = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/test2.paired_end.sorted.bam.bai" + test2_paired_end_name_sorted_bam = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/test2.paired_end.name.sorted.bam" + test2_paired_end_markduplicates_sorted_bam = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/test2.paired_end.markduplicates.sorted.bam" + test2_paired_end_markduplicates_sorted_bam_bai = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/test2.paired_end.markduplicates.sorted.bam.bai" + test2_paired_end_recalibrated_sorted_bam = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/test2.paired_end.recalibrated.sorted.bam" + test2_paired_end_recalibrated_sorted_bam_bai = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/test2.paired_end.recalibrated.sorted.bam.bai" + test2_paired_end_umi_consensus_bam = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/umi/test2.paired_end.umi_consensus.bam" + test2_paired_end_umi_converted_bam = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/umi/test2.paired_end.umi_converted.bam" + test2_paired_end_umi_grouped_bam = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/umi/test2.paired_end.umi_grouped.bam" + test2_paired_end_umi_histogram_txt = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/umi/test2.paired_end.umi_histogram.txt" + test2_paired_end_umi_unsorted_bam = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/umi/test2.paired_end.umi_unsorted.bam" + test2_paired_end_umi_unsorted_tagged_bam = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/umi/test2.paired_end.unsorted_tagged.bam" - mitochon_standin_recalibrated_sorted_bam = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/mitochon_standin.recalibrated.sorted.bam" - mitochon_standin_recalibrated_sorted_bam_bai = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/mitochon_standin.recalibrated.sorted.bam.bai" + mitochon_standin_recalibrated_sorted_bam = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/mitochon_standin.recalibrated.sorted.bam" + mitochon_standin_recalibrated_sorted_bam_bai = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/mitochon_standin.recalibrated.sorted.bam.bai" - test_paired_end_sorted_cram = "${test_data_dir}/genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram" - test_paired_end_sorted_cram_crai = "${test_data_dir}/genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram.crai" - test_paired_end_markduplicates_sorted_cram = "${test_data_dir}/genomics/homo_sapiens/illumina/cram/test.paired_end.markduplicates.sorted.cram" - test_paired_end_markduplicates_sorted_cram_crai = "${test_data_dir}/genomics/homo_sapiens/illumina/cram/test.paired_end.markduplicates.sorted.cram.crai" - test_paired_end_recalibrated_sorted_cram = "${test_data_dir}/genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram" - test_paired_end_recalibrated_sorted_cram_crai = "${test_data_dir}/genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram.crai" + test_paired_end_sorted_cram = "${test_data_dir}/genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram" + test_paired_end_sorted_cram_crai = "${test_data_dir}/genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram.crai" + test_paired_end_markduplicates_sorted_cram = "${test_data_dir}/genomics/homo_sapiens/illumina/cram/test.paired_end.markduplicates.sorted.cram" + test_paired_end_markduplicates_sorted_cram_crai = "${test_data_dir}/genomics/homo_sapiens/illumina/cram/test.paired_end.markduplicates.sorted.cram.crai" + test_paired_end_recalibrated_sorted_cram = "${test_data_dir}/genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram" + test_paired_end_recalibrated_sorted_cram_crai = "${test_data_dir}/genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram.crai" - test2_paired_end_sorted_cram = "${test_data_dir}/genomics/homo_sapiens/illumina/cram/test2.paired_end.sorted.cram" - test2_paired_end_sorted_cram_crai = "${test_data_dir}/genomics/homo_sapiens/illumina/cram/test2.paired_end.sorted.cram.crai" - test2_paired_end_markduplicates_sorted_cram = "${test_data_dir}/genomics/homo_sapiens/illumina/cram/test2.paired_end.markduplicates.sorted.cram" - test2_paired_end_markduplicates_sorted_cram_crai = "${test_data_dir}/genomics/homo_sapiens/illumina/cram/test2.paired_end.markduplicates.sorted.cram.crai" - test2_paired_end_recalibrated_sorted_cram = "${test_data_dir}/genomics/homo_sapiens/illumina/cram/test2.paired_end.recalibrated.sorted.cram" - test2_paired_end_recalibrated_sorted_cram_crai = "${test_data_dir}/genomics/homo_sapiens/illumina/cram/test2.paired_end.recalibrated.sorted.cram.crai" + test2_paired_end_sorted_cram = "${test_data_dir}/genomics/homo_sapiens/illumina/cram/test2.paired_end.sorted.cram" + test2_paired_end_sorted_cram_crai = "${test_data_dir}/genomics/homo_sapiens/illumina/cram/test2.paired_end.sorted.cram.crai" + test2_paired_end_markduplicates_sorted_cram = "${test_data_dir}/genomics/homo_sapiens/illumina/cram/test2.paired_end.markduplicates.sorted.cram" + test2_paired_end_markduplicates_sorted_cram_crai = "${test_data_dir}/genomics/homo_sapiens/illumina/cram/test2.paired_end.markduplicates.sorted.cram.crai" + test2_paired_end_recalibrated_sorted_cram = "${test_data_dir}/genomics/homo_sapiens/illumina/cram/test2.paired_end.recalibrated.sorted.cram" + test2_paired_end_recalibrated_sorted_cram_crai = "${test_data_dir}/genomics/homo_sapiens/illumina/cram/test2.paired_end.recalibrated.sorted.cram.crai" - test_1_fastq_gz = "${test_data_dir}/genomics/homo_sapiens/illumina/fastq/test_1.fastq.gz" - test_2_fastq_gz = "${test_data_dir}/genomics/homo_sapiens/illumina/fastq/test_2.fastq.gz" - test_umi_1_fastq_gz = "${test_data_dir}/genomics/homo_sapiens/illumina/fastq/test.umi_1.fastq.gz" - test_umi_2_fastq_gz = "${test_data_dir}/genomics/homo_sapiens/illumina/fastq/test.umi_2.fastq.gz" - test2_1_fastq_gz = "${test_data_dir}/genomics/homo_sapiens/illumina/fastq/test2_1.fastq.gz" - test2_2_fastq_gz = "${test_data_dir}/genomics/homo_sapiens/illumina/fastq/test2_2.fastq.gz" - test2_umi_1_fastq_gz = "${test_data_dir}/genomics/homo_sapiens/illumina/fastq/test2.umi_1.fastq.gz" - test2_umi_2_fastq_gz = "${test_data_dir}/genomics/homo_sapiens/illumina/fastq/test2.umi_2.fastq.gz" - test_rnaseq_1_fastq_gz = "${test_data_dir}/genomics/homo_sapiens/illumina/fastq/test_rnaseq_1.fastq.gz" - test_rnaseq_2_fastq_gz = "${test_data_dir}/genomics/homo_sapiens/illumina/fastq/test_rnaseq_2.fastq.gz" + test_1_fastq_gz = "${test_data_dir}/genomics/homo_sapiens/illumina/fastq/test_1.fastq.gz" + test_2_fastq_gz = "${test_data_dir}/genomics/homo_sapiens/illumina/fastq/test_2.fastq.gz" + test_umi_1_fastq_gz = "${test_data_dir}/genomics/homo_sapiens/illumina/fastq/test.umi_1.fastq.gz" + test_umi_2_fastq_gz = "${test_data_dir}/genomics/homo_sapiens/illumina/fastq/test.umi_2.fastq.gz" + test2_1_fastq_gz = "${test_data_dir}/genomics/homo_sapiens/illumina/fastq/test2_1.fastq.gz" + test2_2_fastq_gz = "${test_data_dir}/genomics/homo_sapiens/illumina/fastq/test2_2.fastq.gz" + test2_umi_1_fastq_gz = "${test_data_dir}/genomics/homo_sapiens/illumina/fastq/test2.umi_1.fastq.gz" + test2_umi_2_fastq_gz = "${test_data_dir}/genomics/homo_sapiens/illumina/fastq/test2.umi_2.fastq.gz" + test_rnaseq_1_fastq_gz = "${test_data_dir}/genomics/homo_sapiens/illumina/fastq/test_rnaseq_1.fastq.gz" + test_rnaseq_2_fastq_gz = "${test_data_dir}/genomics/homo_sapiens/illumina/fastq/test_rnaseq_2.fastq.gz" - test_baserecalibrator_table = "${test_data_dir}/genomics/homo_sapiens/illumina/gatk/test.baserecalibrator.table" - test2_baserecalibrator_table = "${test_data_dir}/genomics/homo_sapiens/illumina/gatk/test2.baserecalibrator.table" - test_pileups_table = "${test_data_dir}/genomics/homo_sapiens/illumina/gatk/test.pileups.table" - test2_pileups_table = "${test_data_dir}/genomics/homo_sapiens/illumina/gatk/test2.pileups.table" + test_baserecalibrator_table = "${test_data_dir}/genomics/homo_sapiens/illumina/gatk/test.baserecalibrator.table" + test2_baserecalibrator_table = "${test_data_dir}/genomics/homo_sapiens/illumina/gatk/test2.baserecalibrator.table" + test_pileups_table = "${test_data_dir}/genomics/homo_sapiens/illumina/gatk/test.pileups.table" + test2_pileups_table = "${test_data_dir}/genomics/homo_sapiens/illumina/gatk/test2.pileups.table" - test_genomicsdb_tar_gz = "${test_data_dir}/genomics/homo_sapiens/illumina/gatk/test_genomicsdb.tar.gz" - test_pon_genomicsdb_tar_gz = "${test_data_dir}/genomics/homo_sapiens/illumina/gatk/test_pon_genomicsdb.tar.gz" + test_genomicsdb_tar_gz = "${test_data_dir}/genomics/homo_sapiens/illumina/gatk/test_genomicsdb.tar.gz" + test_pon_genomicsdb_tar_gz = "${test_data_dir}/genomics/homo_sapiens/illumina/gatk/test_pon_genomicsdb.tar.gz" - test2_haplotc_ann_vcf_gz = "${test_data_dir}/genomics/homo_sapiens/illumina/gatk/haplotypecaller_calls/test2_haplotc.ann.vcf.gz" - test2_haplotc_ann_vcf_gz_tbi = "${test_data_dir}/genomics/homo_sapiens/illumina/gatk/haplotypecaller_calls/test2_haplotc.ann.vcf.gz.tbi" + test2_haplotc_ann_vcf_gz = "${test_data_dir}/genomics/homo_sapiens/illumina/gatk/haplotypecaller_calls/test2_haplotc.ann.vcf.gz" + test2_haplotc_ann_vcf_gz_tbi = "${test_data_dir}/genomics/homo_sapiens/illumina/gatk/haplotypecaller_calls/test2_haplotc.ann.vcf.gz.tbi" - test2_recal = "${test_data_dir}/genomics/homo_sapiens/illumina/gatk/variantrecalibrator/test2.recal" - test2_recal_idx = "${test_data_dir}/genomics/homo_sapiens/illumina/gatk/variantrecalibrator/test2.recal.idx" - test2_tranches = "${test_data_dir}/genomics/homo_sapiens/illumina/gatk/variantrecalibrator/test2.tranches" - test2_allele_specific_recal = "${test_data_dir}/genomics/homo_sapiens/illumina/gatk/variantrecalibrator/test2_allele_specific.recal" - test2_allele_specific_recal_idx = "${test_data_dir}/genomics/homo_sapiens/illumina/gatk/variantrecalibrator/test2_allele_specific.recal.idx" - test2_allele_specific_tranches = "${test_data_dir}/genomics/homo_sapiens/illumina/gatk/variantrecalibrator/test2_allele_specific.tranches" + test2_recal = "${test_data_dir}/genomics/homo_sapiens/illumina/gatk/variantrecalibrator/test2.recal" + test2_recal_idx = "${test_data_dir}/genomics/homo_sapiens/illumina/gatk/variantrecalibrator/test2.recal.idx" + test2_tranches = "${test_data_dir}/genomics/homo_sapiens/illumina/gatk/variantrecalibrator/test2.tranches" + test2_allele_specific_recal = "${test_data_dir}/genomics/homo_sapiens/illumina/gatk/variantrecalibrator/test2_allele_specific.recal" + test2_allele_specific_recal_idx = "${test_data_dir}/genomics/homo_sapiens/illumina/gatk/variantrecalibrator/test2_allele_specific.recal.idx" + test2_allele_specific_tranches = "${test_data_dir}/genomics/homo_sapiens/illumina/gatk/variantrecalibrator/test2_allele_specific.tranches" - test_test2_paired_mutect2_calls_vcf_gz = "${test_data_dir}/genomics/homo_sapiens/illumina/gatk/paired_mutect2_calls/test_test2_paired_mutect2_calls.vcf.gz" - test_test2_paired_mutect2_calls_vcf_gz_tbi = "${test_data_dir}/genomics/homo_sapiens/illumina/gatk/paired_mutect2_calls/test_test2_paired_mutect2_calls.vcf.gz.tbi" - test_test2_paired_mutect2_calls_vcf_gz_stats = "${test_data_dir}/genomics/homo_sapiens/illumina/gatk/paired_mutect2_calls/test_test2_paired_mutect2_calls.vcf.gz.stats" - test_test2_paired_mutect2_calls_f1r2_tar_gz = "${test_data_dir}/genomics/homo_sapiens/illumina/gatk/paired_mutect2_calls/test_test2_paired_mutect2_calls.f1r2.tar.gz" - test_test2_paired_mutect2_calls_artifact_prior_tar_gz = "${test_data_dir}/genomics/homo_sapiens/illumina/gatk/test_test2_paired_mutect2_calls.artifact-prior.tar.gz" - test_test2_paired_segmentation_table = "${test_data_dir}/genomics/homo_sapiens/illumina/gatk/test_test2_paired.segmentation.table" - test_test2_paired_contamination_table = "${test_data_dir}/genomics/homo_sapiens/illumina/gatk/test_test2_paired.contamination.table" + test_test2_paired_mutect2_calls_vcf_gz = "${test_data_dir}/genomics/homo_sapiens/illumina/gatk/paired_mutect2_calls/test_test2_paired_mutect2_calls.vcf.gz" + test_test2_paired_mutect2_calls_vcf_gz_tbi = "${test_data_dir}/genomics/homo_sapiens/illumina/gatk/paired_mutect2_calls/test_test2_paired_mutect2_calls.vcf.gz.tbi" + test_test2_paired_mutect2_calls_vcf_gz_stats = "${test_data_dir}/genomics/homo_sapiens/illumina/gatk/paired_mutect2_calls/test_test2_paired_mutect2_calls.vcf.gz.stats" + test_test2_paired_mutect2_calls_f1r2_tar_gz = "${test_data_dir}/genomics/homo_sapiens/illumina/gatk/paired_mutect2_calls/test_test2_paired_mutect2_calls.f1r2.tar.gz" + test_test2_paired_mutect2_calls_artifact_prior_tar_gz = "${test_data_dir}/genomics/homo_sapiens/illumina/gatk/test_test2_paired_mutect2_calls.artifact-prior.tar.gz" + test_test2_paired_segmentation_table = "${test_data_dir}/genomics/homo_sapiens/illumina/gatk/test_test2_paired.segmentation.table" + test_test2_paired_contamination_table = "${test_data_dir}/genomics/homo_sapiens/illumina/gatk/test_test2_paired.contamination.table" - test_genome_vcf = "${test_data_dir}/genomics/homo_sapiens/illumina/gvcf/test.genome.vcf" - test_genome_vcf_gz = "${test_data_dir}/genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz" - test_genome_vcf_gz_tbi = "${test_data_dir}/genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz.tbi" - test_genome_vcf_idx = "${test_data_dir}/genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.idx" + test_genome_vcf = "${test_data_dir}/genomics/homo_sapiens/illumina/gvcf/test.genome.vcf" + test_genome_vcf_gz = "${test_data_dir}/genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz" + test_genome_vcf_gz_tbi = "${test_data_dir}/genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz.tbi" + test_genome_vcf_idx = "${test_data_dir}/genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.idx" - test2_genome_vcf = "${test_data_dir}/genomics/homo_sapiens/illumina/gvcf/test2.genome.vcf" - test2_genome_vcf_gz = "${test_data_dir}/genomics/homo_sapiens/illumina/gvcf/test2.genome.vcf.gz" - test2_genome_vcf_gz_tbi = "${test_data_dir}/genomics/homo_sapiens/illumina/gvcf/test2.genome.vcf.gz.tbi" - test2_genome_vcf_idx = "${test_data_dir}/genomics/homo_sapiens/illumina/gvcf/test2.genome.vcf.idx" + test2_genome_vcf = "${test_data_dir}/genomics/homo_sapiens/illumina/gvcf/test2.genome.vcf" + test2_genome_vcf_gz = "${test_data_dir}/genomics/homo_sapiens/illumina/gvcf/test2.genome.vcf.gz" + test2_genome_vcf_gz_tbi = "${test_data_dir}/genomics/homo_sapiens/illumina/gvcf/test2.genome.vcf.gz.tbi" + test2_genome_vcf_idx = "${test_data_dir}/genomics/homo_sapiens/illumina/gvcf/test2.genome.vcf.idx" - test_genome21_indels_vcf_gz = "${test_data_dir}/genomics/homo_sapiens/illumina/vcf/test.genome_21.somatic_sv.vcf.gz" - test_genome21_indels_vcf_gz_tbi = "${test_data_dir}/genomics/homo_sapiens/illumina/vcf/test.genome_21.somatic_sv.vcf.gz.tbi" + test_genome21_indels_vcf_gz = "${test_data_dir}/genomics/homo_sapiens/illumina/vcf/test.genome_21.somatic_sv.vcf.gz" + test_genome21_indels_vcf_gz_tbi = "${test_data_dir}/genomics/homo_sapiens/illumina/vcf/test.genome_21.somatic_sv.vcf.gz.tbi" - test_mpileup = "${test_data_dir}/genomics/homo_sapiens/illumina/mpileup/test.mpileup.gz" - test2_mpileup = "${test_data_dir}/genomics/homo_sapiens/illumina/mpileup/test2.mpileup.gz" + test_mpileup = "${test_data_dir}/genomics/homo_sapiens/illumina/mpileup/test.mpileup.gz" + test2_mpileup = "${test_data_dir}/genomics/homo_sapiens/illumina/mpileup/test2.mpileup.gz" - test_broadpeak = "${test_data_dir}/genomics/homo_sapiens/illumina/broadpeak/test.broadPeak" - test2_broadpeak = "${test_data_dir}/genomics/homo_sapiens/illumina/broadpeak/test2.broadPeak" + test_broadpeak = "${test_data_dir}/genomics/homo_sapiens/illumina/broadpeak/test.broadPeak" + test2_broadpeak = "${test_data_dir}/genomics/homo_sapiens/illumina/broadpeak/test2.broadPeak" - test_narrowpeak = "${test_data_dir}/genomics/homo_sapiens/illumina/narrowpeak/test.narrowPeak" - test2_narrowpeak = "${test_data_dir}/genomics/homo_sapiens/illumina/narrowpeak/test2.narrowPeak" + test_narrowpeak = "${test_data_dir}/genomics/homo_sapiens/illumina/narrowpeak/test.narrowPeak" + test2_narrowpeak = "${test_data_dir}/genomics/homo_sapiens/illumina/narrowpeak/test2.narrowPeak" - test_10x_1_fastq_gz = "${test_data_dir}/genomics/homo_sapiens/illumina/10xgenomics/test_10x_S1_L001_R1_001.fastq.gz" - test_10x_2_fastq_gz = "${test_data_dir}/genomics/homo_sapiens/illumina/10xgenomics/test_10x_S1_L001_R2_001.fastq.gz" + test_10x_1_fastq_gz = "${test_data_dir}/genomics/homo_sapiens/illumina/10xgenomics/test_10x_S1_L001_R1_001.fastq.gz" + test_10x_2_fastq_gz = "${test_data_dir}/genomics/homo_sapiens/illumina/10xgenomics/test_10x_S1_L001_R2_001.fastq.gz" - test_yak = "${test_data_dir}/genomics/homo_sapiens/illumina/yak/test.yak" - test2_yak = "${test_data_dir}/genomics/homo_sapiens/illumina/yak/test2.yak" + test_yak = "${test_data_dir}/genomics/homo_sapiens/illumina/yak/test.yak" + test2_yak = "${test_data_dir}/genomics/homo_sapiens/illumina/yak/test2.yak" - cutandrun_bedgraph_test_1 = "${test_data_dir}/genomics/homo_sapiens/illumina/bedgraph/cutandtag_h3k27me3_test_1.bedGraph" - cutandrun_bedgraph_test_2 = "${test_data_dir}/genomics/homo_sapiens/illumina/bedgraph/cutandtag_igg_test_1.bedGraph" + cutandrun_bedgraph_test_1 = "${test_data_dir}/genomics/homo_sapiens/illumina/bedgraph/cutandtag_h3k27me3_test_1.bedGraph" + cutandrun_bedgraph_test_2 = "${test_data_dir}/genomics/homo_sapiens/illumina/bedgraph/cutandtag_igg_test_1.bedGraph" - test_rnaseq_vcf = "${test_data_dir}/genomics/homo_sapiens/illumina/vcf/test.rnaseq.vcf" - test_sv_vcf = "${test_data_dir}/genomics/homo_sapiens/illumina/vcf/sv_query.vcf.gz" + test_rnaseq_vcf = "${test_data_dir}/genomics/homo_sapiens/illumina/vcf/test.rnaseq.vcf" + test_sv_vcf = "${test_data_dir}/genomics/homo_sapiens/illumina/vcf/sv_query.vcf.gz" - test_pytor = "${test_data_dir}/genomics/homo_sapiens/illumina/pytor/test.pytor" + test_pytor = "${test_data_dir}/genomics/homo_sapiens/illumina/pytor/test.pytor" } 'pacbio' { - primers = "${test_data_dir}/genomics/homo_sapiens/pacbio/fasta/primers.fasta" - alz = "${test_data_dir}/genomics/homo_sapiens/pacbio/bam/alz.bam" - alzpbi = "${test_data_dir}/genomics/homo_sapiens/pacbio/bam/alz.bam.pbi" - ccs = "${test_data_dir}/genomics/homo_sapiens/pacbio/bam/alz.ccs.bam" - ccs_fa = "${test_data_dir}/genomics/homo_sapiens/pacbio/fasta/alz.ccs.fasta" - ccs_fa_gz = "${test_data_dir}/genomics/homo_sapiens/pacbio/fasta/alz.ccs.fasta.gz" - ccs_fq = "${test_data_dir}/genomics/homo_sapiens/pacbio/fastq/alz.ccs.fastq" - ccs_fq_gz = "${test_data_dir}/genomics/homo_sapiens/pacbio/fastq/alz.ccs.fastq.gz" - ccs_xml = "${test_data_dir}/genomics/homo_sapiens/pacbio/xml/alz.ccs.consensusreadset.xml" - hifi = "${test_data_dir}/genomics/homo_sapiens/pacbio/fastq/test_hifi.fastq.gz" - lima = "${test_data_dir}/genomics/homo_sapiens/pacbio/bam/alz.ccs.fl.NEB_5p--NEB_Clontech_3p.bam" - refine = "${test_data_dir}/genomics/homo_sapiens/pacbio/bam/alz.ccs.fl.NEB_5p--NEB_Clontech_3p.flnc.bam" - cluster = "${test_data_dir}/genomics/homo_sapiens/pacbio/bam/alz.ccs.fl.NEB_5p--NEB_Clontech_3p.flnc.clustered.bam" - singletons = "${test_data_dir}/genomics/homo_sapiens/pacbio/bam/alz.ccs.fl.NEB_5p--NEB_Clontech_3p.flnc.clustered.singletons.bam" - aligned = "${test_data_dir}/genomics/homo_sapiens/pacbio/bam/alz.ccs.fl.NEB_5p--NEB_Clontech_3p.flnc.clustered.singletons.merged.aligned.bam" - alignedbai = "${test_data_dir}/genomics/homo_sapiens/pacbio/bam/alz.ccs.fl.NEB_5p--NEB_Clontech_3p.flnc.clustered.singletons.merged.aligned.bam.bai" - genemodel1 = "${test_data_dir}/genomics/homo_sapiens/pacbio/bed/alz.ccs.fl.NEB_5p--NEB_Clontech_3p.flnc.clustered.singletons.merged.aligned_tc.bed" - genemodel2 = "${test_data_dir}/genomics/homo_sapiens/pacbio/bed/alz.ccs.fl.NEB_5p--NEB_Clontech_3p.flnc.clustered.singletons.merged.aligned_tc.2.bed" - filelist = "${test_data_dir}/genomics/homo_sapiens/pacbio/txt/filelist.txt" + primers = "${test_data_dir}/genomics/homo_sapiens/pacbio/fasta/primers.fasta" + alz = "${test_data_dir}/genomics/homo_sapiens/pacbio/bam/alz.bam" + alzpbi = "${test_data_dir}/genomics/homo_sapiens/pacbio/bam/alz.bam.pbi" + ccs = "${test_data_dir}/genomics/homo_sapiens/pacbio/bam/alz.ccs.bam" + ccs_fa = "${test_data_dir}/genomics/homo_sapiens/pacbio/fasta/alz.ccs.fasta" + ccs_fa_gz = "${test_data_dir}/genomics/homo_sapiens/pacbio/fasta/alz.ccs.fasta.gz" + ccs_fq = "${test_data_dir}/genomics/homo_sapiens/pacbio/fastq/alz.ccs.fastq" + ccs_fq_gz = "${test_data_dir}/genomics/homo_sapiens/pacbio/fastq/alz.ccs.fastq.gz" + ccs_xml = "${test_data_dir}/genomics/homo_sapiens/pacbio/xml/alz.ccs.consensusreadset.xml" + hifi = "${test_data_dir}/genomics/homo_sapiens/pacbio/fastq/test_hifi.fastq.gz" + lima = "${test_data_dir}/genomics/homo_sapiens/pacbio/bam/alz.ccs.fl.NEB_5p--NEB_Clontech_3p.bam" + refine = "${test_data_dir}/genomics/homo_sapiens/pacbio/bam/alz.ccs.fl.NEB_5p--NEB_Clontech_3p.flnc.bam" + cluster = "${test_data_dir}/genomics/homo_sapiens/pacbio/bam/alz.ccs.fl.NEB_5p--NEB_Clontech_3p.flnc.clustered.bam" + singletons = "${test_data_dir}/genomics/homo_sapiens/pacbio/bam/alz.ccs.fl.NEB_5p--NEB_Clontech_3p.flnc.clustered.singletons.bam" + aligned = "${test_data_dir}/genomics/homo_sapiens/pacbio/bam/alz.ccs.fl.NEB_5p--NEB_Clontech_3p.flnc.clustered.singletons.merged.aligned.bam" + alignedbai = "${test_data_dir}/genomics/homo_sapiens/pacbio/bam/alz.ccs.fl.NEB_5p--NEB_Clontech_3p.flnc.clustered.singletons.merged.aligned.bam.bai" + genemodel1 = "${test_data_dir}/genomics/homo_sapiens/pacbio/bed/alz.ccs.fl.NEB_5p--NEB_Clontech_3p.flnc.clustered.singletons.merged.aligned_tc.bed" + genemodel2 = "${test_data_dir}/genomics/homo_sapiens/pacbio/bed/alz.ccs.fl.NEB_5p--NEB_Clontech_3p.flnc.clustered.singletons.merged.aligned_tc.2.bed" + filelist = "${test_data_dir}/genomics/homo_sapiens/pacbio/txt/filelist.txt" } } 'bacteroides_fragilis' { @@ -394,31 +394,31 @@ params { } 'generic' { 'csv' { - test_csv = "${test_data_dir}/generic/csv/test.csv" + test_csv = "${test_data_dir}/generic/csv/test.csv" } 'notebooks' { - rmarkdown = "${test_data_dir}/generic/notebooks/rmarkdown/rmarkdown_notebook.Rmd" - ipython_md = "${test_data_dir}/generic/notebooks/jupyter/ipython_notebook.md" - ipython_ipynb = "${test_data_dir}/generic/notebooks/jupyter/ipython_notebook.ipynb" + rmarkdown = "${test_data_dir}/generic/notebooks/rmarkdown/rmarkdown_notebook.Rmd" + ipython_md = "${test_data_dir}/generic/notebooks/jupyter/ipython_notebook.md" + ipython_ipynb = "${test_data_dir}/generic/notebooks/jupyter/ipython_notebook.ipynb" } 'tsv' { - test_tsv = "${test_data_dir}/generic/tsv/test.tsv" + test_tsv = "${test_data_dir}/generic/tsv/test.tsv" } 'txt' { - hello = "${test_data_dir}/generic/txt/hello.txt" + hello = "${test_data_dir}/generic/txt/hello.txt" } 'cnn' { - reference = "${test_data_dir}/generic/cnn/reference.cnn" + reference = "${test_data_dir}/generic/cnn/reference.cnn" } 'cooler'{ - test_pairix_pair_gz = "${test_data_dir}/genomics/homo_sapiens/cooler/cload/hg19/hg19.GM12878-MboI.pairs.subsample.blksrt.txt.gz" - test_pairix_pair_gz_px2 = "${test_data_dir}/genomics/homo_sapiens/cooler/cload/hg19/hg19.GM12878-MboI.pairs.subsample.blksrt.txt.gz.px2" - test_pairs_pair = "${test_data_dir}/genomics/homo_sapiens/cooler/cload/hg19/hg19.sample1.pairs" - test_tabix_pair_gz = "${test_data_dir}/genomics/homo_sapiens/cooler/cload/hg19/hg19.GM12878-MboI.pairs.subsample.sorted.possrt.txt.gz" - test_tabix_pair_gz_tbi = "${test_data_dir}/genomics/homo_sapiens/cooler/cload/hg19/hg19.GM12878-MboI.pairs.subsample.sorted.possrt.txt.gz.tbi" - hg19_chrom_sizes = "${test_data_dir}/genomics/homo_sapiens/cooler/cload/hg19/hg19.chrom.sizes" - test_merge_cool = "${test_data_dir}/genomics/homo_sapiens/cooler/merge/toy/toy.symm.upper.2.cool" - test_merge_cool_cp2 = "${test_data_dir}/genomics/homo_sapiens/cooler/merge/toy/toy.symm.upper.2.cp2.cool" + test_pairix_pair_gz = "${test_data_dir}/genomics/homo_sapiens/cooler/cload/hg19/hg19.GM12878-MboI.pairs.subsample.blksrt.txt.gz" + test_pairix_pair_gz_px2 = "${test_data_dir}/genomics/homo_sapiens/cooler/cload/hg19/hg19.GM12878-MboI.pairs.subsample.blksrt.txt.gz.px2" + test_pairs_pair = "${test_data_dir}/genomics/homo_sapiens/cooler/cload/hg19/hg19.sample1.pairs" + test_tabix_pair_gz = "${test_data_dir}/genomics/homo_sapiens/cooler/cload/hg19/hg19.GM12878-MboI.pairs.subsample.sorted.possrt.txt.gz" + test_tabix_pair_gz_tbi = "${test_data_dir}/genomics/homo_sapiens/cooler/cload/hg19/hg19.GM12878-MboI.pairs.subsample.sorted.possrt.txt.gz.tbi" + hg19_chrom_sizes = "${test_data_dir}/genomics/homo_sapiens/cooler/cload/hg19/hg19.chrom.sizes" + test_merge_cool = "${test_data_dir}/genomics/homo_sapiens/cooler/merge/toy/toy.symm.upper.2.cool" + test_merge_cool_cp2 = "${test_data_dir}/genomics/homo_sapiens/cooler/merge/toy/toy.symm.upper.2.cp2.cool" } }