diff --git a/modules/busco/main.nf b/modules/busco/main.nf new file mode 100644 index 00000000..f0713862 --- /dev/null +++ b/modules/busco/main.nf @@ -0,0 +1,84 @@ +process BUSCO { + tag "$meta.id" + label 'process_medium' + + conda (params.enable_conda ? "bioconda::busco=5.3.2" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/busco:5.3.2--pyhdfd78af_0': + 'quay.io/biocontainers/busco:5.3.2--pyhdfd78af_0' }" + + input: + tuple val(meta), path('tmp_input/*') + each lineage // Required: lineage to check against, "auto" enables --auto-lineage instead + path busco_lineages_path // Recommended: path to busco lineages - downloads if not set + path config_file // Optional: busco configuration file + + output: + tuple val(meta), path("*-busco.batch_summary.txt"), emit: batch_summary + tuple val(meta), path("short_summary.*.txt") , emit: short_summaries_txt, optional: true + tuple val(meta), path("short_summary.*.json") , emit: short_summaries_json, optional: true + tuple val(meta), path("*-busco") , emit: busco_dir + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}-${lineage}" + def busco_config = config_file ? "--config $config_file" : '' + def busco_lineage = lineage.equals('auto') ? '--auto-lineage' : "--lineage_dataset ${lineage}" + def busco_lineage_dir = busco_lineages_path ? "--offline --download_path ${busco_lineages_path}" : '' + """ + # Nextflow changes the container --entrypoint to /bin/bash (container default entrypoint: /usr/local/env-execute) + # Check for container variable initialisation script and source it. + if [ -f "/usr/local/env-activate.sh" ]; then + set +u # Otherwise, errors out because of various unbound variables + . "/usr/local/env-activate.sh" + set -u + fi + + # If the augustus config directory is not writable, then copy to writeable area + if [ ! -w "\${AUGUSTUS_CONFIG_PATH}" ]; then + # Create writable tmp directory for augustus + AUG_CONF_DIR=\$( mktemp -d -p \$PWD ) + cp -r \$AUGUSTUS_CONFIG_PATH/* \$AUG_CONF_DIR + export AUGUSTUS_CONFIG_PATH=\$AUG_CONF_DIR + echo "New AUGUSTUS_CONFIG_PATH=\${AUGUSTUS_CONFIG_PATH}" + fi + + # Ensure the input is uncompressed + INPUT_SEQS=input_seqs + mkdir "\$INPUT_SEQS" + cd "\$INPUT_SEQS" + for FASTA in ../tmp_input/*; do + if [ "\${FASTA##*.}" == 'gz' ]; then + gzip -cdf "\$FASTA" > \$( basename "\$FASTA" .gz ) + else + ln -s "\$FASTA" . + fi + done + cd .. + + busco \\ + --cpu $task.cpus \\ + --in "\$INPUT_SEQS" \\ + --out ${prefix}-busco \\ + $busco_lineage \\ + $busco_lineage_dir \\ + $busco_config \\ + $args + + # clean up + rm -rf "\$INPUT_SEQS" + + # Move files to avoid staging/publishing issues + mv ${prefix}-busco/batch_summary.txt ${prefix}-busco.batch_summary.txt + mv ${prefix}-busco/*/short_summary.*.{json,txt} . || echo "Short summaries were not available: No genes were found." + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + busco: \$( busco --version 2>&1 | sed 's/^BUSCO //' ) + END_VERSIONS + """ +} diff --git a/modules/busco/meta.yml b/modules/busco/meta.yml new file mode 100644 index 00000000..ef8c5245 --- /dev/null +++ b/modules/busco/meta.yml @@ -0,0 +1,69 @@ +name: busco +description: Benchmarking Universal Single Copy Orthologs +keywords: + - quality control + - genome + - transcriptome + - proteome +tools: + - busco: + description: BUSCO provides measures for quantitative assessment of genome assembly, gene set, and transcriptome completeness based on evolutionarily informed expectations of gene content from near-universal single-copy orthologs selected from OrthoDB. + homepage: https://busco.ezlab.org/ + documentation: https://busco.ezlab.org/busco_userguide.html + tool_dev_url: https://gitlab.com/ezlab/busco + doi: "10.1007/978-1-4939-9173-0_14" + licence: ["MIT"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fasta: + type: file + description: Nucleic or amino acid sequence file in FASTA format. + pattern: "*.{fasta,fna,fa,fasta.gz,fna.gz,fa.gz}" + - lineage: + type: value + description: The BUSCO lineage to use, or "auto" to automatically select lineage + - busco_lineages_path: + type: directory + description: Path to local BUSCO lineages directory. + - config_file: + type: file + description: Path to BUSCO config file. + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - batch_summary: + type: file + description: Summary of all sequence files analyzed + pattern: "*-busco.batch_summary.txt" + - short_summaries_txt: + type: file + description: Short Busco summary in plain text format + pattern: "short_summary.*.txt" + - short_summaries_json: + type: file + description: Short Busco summary in JSON format + pattern: "short_summary.*.json" + - busco_dir: + type: directory + description: BUSCO lineage specific output + pattern: "*-busco" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + +authors: + - "@priyanka-surana" + - "@charles-plessy" + - "@mahesh-panchal" + - "@muffato" + - "@jvhagey" diff --git a/tests/config/pytest_modules.yml b/tests/config/pytest_modules.yml index effe6fe2..62816c50 100644 --- a/tests/config/pytest_modules.yml +++ b/tests/config/pytest_modules.yml @@ -337,6 +337,10 @@ bracken/bracken: - modules/bracken/bracken/** - tests/modules/bracken/bracken/** +busco: + - modules/busco/** + - tests/modules/busco/** + bwa/aln: - modules/bwa/aln/** - tests/modules/bwa/aln/** diff --git a/tests/modules/busco/main.nf b/tests/modules/busco/main.nf new file mode 100644 index 00000000..598e148a --- /dev/null +++ b/tests/modules/busco/main.nf @@ -0,0 +1,404 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { BUSCO } from '../../../modules/busco/main.nf' + +workflow test_busco_genome_single_fasta { + + input = [ + [ id:'test' ], // meta map + file( params.test_data['bacteroides_fragilis']['genome']['genome_fna_gz'], checkIfExists: true) + ] + + BUSCO ( + input, + ['bacteria_odb10', 'bacteroidetes_odb10'], // Launch with 'auto' to use --auto-lineage, and specified lineages // 'auto' removed from test due to memory issues + [], // Download busco lineage + [], // No config + ) + + /* Output tree: + /tmp/tmpyz_hi62i/busco/ + ├── short_summary.specific.bacteria_odb10.genome.fna.json -> /tmp/tmpza_0dth3/33/7d8c9b2c8931d9ad6a67aa843895e7/short_summary.specific.bacteria_odb10.genome.fna.json + ├── short_summary.specific.bacteria_odb10.genome.fna.txt -> /tmp/tmpza_0dth3/33/7d8c9b2c8931d9ad6a67aa843895e7/short_summary.specific.bacteria_odb10.genome.fna.txt + ├── short_summary.specific.bacteroidetes_odb10.genome.fna.json -> /tmp/tmpza_0dth3/6a/e95a0cd21785ce33d63b8f73a68a51/short_summary.specific.bacteroidetes_odb10.genome.fna.json + ├── short_summary.specific.bacteroidetes_odb10.genome.fna.txt -> /tmp/tmpza_0dth3/6a/e95a0cd21785ce33d63b8f73a68a51/short_summary.specific.bacteroidetes_odb10.genome.fna.txt + ├── test-bacteria_odb10-busco -> /tmp/tmpza_0dth3/33/7d8c9b2c8931d9ad6a67aa843895e7/test-bacteria_odb10-busco/ + │ ├── genome.fna/ + │ │ ├── logs/ + │ │ │ ├── hmmsearch_err.log + │ │ │ ├── hmmsearch_out.log + │ │ │ ├── prodigal_err.log + │ │ │ └── prodigal_out.log + │ │ ├── prodigal_output/ + │ │ │ └── predicted_genes/ + │ │ └── run_bacteria_odb10/ + │ │ ├── busco_sequences/ + │ │ ├── full_table.tsv + │ │ ├── hmmer_output/ + │ │ ├── missing_busco_list.tsv + │ │ ├── short_summary.json + │ │ └── short_summary.txt + │ └── logs/ + │ └── busco.log + ├── test-bacteria_odb10-busco.batch_summary.txt -> /tmp/tmpza_0dth3/33/7d8c9b2c8931d9ad6a67aa843895e7/test-bacteria_odb10-busco.batch_summary.txt + ├── test-bacteroidetes_odb10-busco -> /tmp/tmpza_0dth3/6a/e95a0cd21785ce33d63b8f73a68a51/test-bacteroidetes_odb10-busco/ + │ ├── genome.fna/ + │ │ ├── logs/ + │ │ │ ├── hmmsearch_err.log + │ │ │ ├── hmmsearch_out.log + │ │ │ ├── prodigal_err.log + │ │ │ └── prodigal_out.log + │ │ ├── prodigal_output/ + │ │ │ └── predicted_genes/ + │ │ └── run_bacteroidetes_odb10/ + │ │ ├── busco_sequences/ + │ │ ├── full_table.tsv + │ │ ├── hmmer_output/ + │ │ ├── missing_busco_list.tsv + │ │ ├── short_summary.json + │ │ └── short_summary.txt + │ └── logs/ + │ └── busco.log + ├── test-bacteroidetes_odb10-busco.batch_summary.txt -> /tmp/tmpza_0dth3/6a/e95a0cd21785ce33d63b8f73a68a51/test-bacteroidetes_odb10-busco.batch_summary.txt + └── versions.yml -> /tmp/tmpza_0dth3/6a/e95a0cd21785ce33d63b8f73a68a51/versions.yml + + Former Output tree -w 'auto': + /tmp/tmp846crjv2/busco/ + ├── short_summary.generic.bacteria_odb10.genome.fna.json -> /tmp/tmpi6af66j1/18/8be22ecd7a71471ff5082bd512972b/short_summary.generic.bacteria_odb10.genome.fna.json + ├── short_summary.generic.bacteria_odb10.genome.fna.txt -> /tmp/tmpi6af66j1/18/8be22ecd7a71471ff5082bd512972b/short_summary.generic.bacteria_odb10.genome.fna.txt + ├── short_summary.specific.bacteria_odb10.genome.fna.json -> /tmp/tmpi6af66j1/45/107812e983a8e695c380ebc215e7d9/short_summary.specific.bacteria_odb10.genome.fna.json + ├── short_summary.specific.bacteria_odb10.genome.fna.txt -> /tmp/tmpi6af66j1/45/107812e983a8e695c380ebc215e7d9/short_summary.specific.bacteria_odb10.genome.fna.txt + ├── short_summary.specific.bacteroidales_odb10.genome.fna.json -> /tmp/tmpi6af66j1/18/8be22ecd7a71471ff5082bd512972b/short_summary.specific.bacteroidales_odb10.genome.fna.json + ├── short_summary.specific.bacteroidales_odb10.genome.fna.txt -> /tmp/tmpi6af66j1/18/8be22ecd7a71471ff5082bd512972b/short_summary.specific.bacteroidales_odb10.genome.fna.txt + ├── short_summary.specific.bacteroidetes_odb10.genome.fna.json -> /tmp/tmpi6af66j1/a2/eb4a34894f3ac5554759ad6c9f652b/short_summary.specific.bacteroidetes_odb10.genome.fna.json + ├── short_summary.specific.bacteroidetes_odb10.genome.fna.txt -> /tmp/tmpi6af66j1/a2/eb4a34894f3ac5554759ad6c9f652b/short_summary.specific.bacteroidetes_odb10.genome.fna.txt + ├── test-auto-busco -> /tmp/tmpi6af66j1/18/8be22ecd7a71471ff5082bd512972b/test-auto-busco/ + │ ├── genome.fna/ + │ │ ├── auto_lineage/ + │ │ │ ├── run_archaea_odb10/ + │ │ │ ├── run_bacteria_odb10/ + │ │ │ └── run_eukaryota_odb10/ + │ │ ├── logs/ + │ │ │ ├── hmmsearch_err.log + │ │ │ ├── hmmsearch_out.log + │ │ │ ├── metaeuk_err.log + │ │ │ ├── metaeuk_out.log + │ │ │ ├── prodigal_err.log + │ │ │ ├── prodigal_out.log + │ │ │ ├── sepp_err.log + │ │ │ └── sepp_out.log + │ │ ├── prodigal_output/ + │ │ │ └── predicted_genes/ + │ │ ├── run_bacteria_odb10 -> /tmp/tmpi6af66j1/18/8be22ecd7a71471ff5082bd512972b/test-auto-busco/genome.fna/auto_lineage/run_bacteria_odb10/ [recursive, not followed] + │ │ └── run_bacteroidales_odb10/ + │ │ ├── busco_sequences/ + │ │ ├── full_table.tsv + │ │ ├── hmmer_output/ + │ │ ├── missing_busco_list.tsv + │ │ ├── short_summary.json + │ │ └── short_summary.txt + │ └── logs/ + │ └── busco.log + ├── test-auto-busco.batch_summary.txt -> /tmp/tmpi6af66j1/18/8be22ecd7a71471ff5082bd512972b/test-auto-busco.batch_summary.txt + ├── test-bacteria_odb10-busco -> /tmp/tmpi6af66j1/45/107812e983a8e695c380ebc215e7d9/test-bacteria_odb10-busco/ + │ ├── genome.fna/ + │ │ ├── logs/ + │ │ │ ├── hmmsearch_err.log + │ │ │ ├── hmmsearch_out.log + │ │ │ ├── prodigal_err.log + │ │ │ └── prodigal_out.log + │ │ ├── prodigal_output/ + │ │ │ └── predicted_genes/ + │ │ └── run_bacteria_odb10/ + │ │ ├── busco_sequences/ + │ │ ├── full_table.tsv + │ │ ├── hmmer_output/ + │ │ ├── missing_busco_list.tsv + │ │ ├── short_summary.json + │ │ └── short_summary.txt + │ └── logs/ + │ └── busco.log + ├── test-bacteria_odb10-busco.batch_summary.txt -> /tmp/tmpi6af66j1/45/107812e983a8e695c380ebc215e7d9/test-bacteria_odb10-busco.batch_summary.txt + ├── test-bacteroidetes_odb10-busco -> /tmp/tmpi6af66j1/a2/eb4a34894f3ac5554759ad6c9f652b/test-bacteroidetes_odb10-busco/ + │ ├── genome.fna/ + │ │ ├── logs/ + │ │ │ ├── hmmsearch_err.log + │ │ │ ├── hmmsearch_out.log + │ │ │ ├── prodigal_err.log + │ │ │ └── prodigal_out.log + │ │ ├── prodigal_output/ + │ │ │ └── predicted_genes/ + │ │ └── run_bacteroidetes_odb10/ + │ │ ├── busco_sequences/ + │ │ ├── full_table.tsv + │ │ ├── hmmer_output/ + │ │ ├── missing_busco_list.tsv + │ │ ├── short_summary.json + │ │ └── short_summary.txt + │ └── logs/ + │ └── busco.log + ├── test-bacteroidetes_odb10-busco.batch_summary.txt -> /tmp/tmpi6af66j1/a2/eb4a34894f3ac5554759ad6c9f652b/test-bacteroidetes_odb10-busco.batch_summary.txt + └── versions.yml -> /tmp/tmpi6af66j1/18/8be22ecd7a71471ff5082bd512972b/versions.yml + */ + +} + +workflow test_busco_genome_multi_fasta { + + input = [ + [ id:'test' ], // meta map + [ + file( params.test_data['bacteroides_fragilis']['genome']['genome_fna_gz'], checkIfExists: true), + file( params.test_data['candidatus_portiera_aleyrodidarum']['genome']['genome_fasta'], checkIfExists: true) + ] + ] + + BUSCO ( + input, + 'bacteria_odb10', + [], // Download busco lineage + [], // No config + ) + + /* Output tree: + /tmp/tmpk19byek7/busco/ + ├── short_summary.specific.bacteria_odb10.genome.fasta.json -> /tmp/tmplt9fv3tl/15/ff310a16d9ce7ad24e207a05ce718e/short_summary.specific.bacteria_odb10.genome.fasta.json + ├── short_summary.specific.bacteria_odb10.genome.fasta.txt -> /tmp/tmplt9fv3tl/15/ff310a16d9ce7ad24e207a05ce718e/short_summary.specific.bacteria_odb10.genome.fasta.txt + ├── short_summary.specific.bacteria_odb10.genome.fna.json -> /tmp/tmplt9fv3tl/15/ff310a16d9ce7ad24e207a05ce718e/short_summary.specific.bacteria_odb10.genome.fna.json + ├── short_summary.specific.bacteria_odb10.genome.fna.txt -> /tmp/tmplt9fv3tl/15/ff310a16d9ce7ad24e207a05ce718e/short_summary.specific.bacteria_odb10.genome.fna.txt + ├── test-bacteria_odb10-busco -> /tmp/tmplt9fv3tl/15/ff310a16d9ce7ad24e207a05ce718e/test-bacteria_odb10-busco/ + │ ├── genome.fasta/ + │ │ ├── logs/ + │ │ │ ├── hmmsearch_err.log + │ │ │ ├── hmmsearch_out.log + │ │ │ ├── prodigal_err.log + │ │ │ └── prodigal_out.log + │ │ ├── prodigal_output/ + │ │ │ └── predicted_genes/ + │ │ └── run_bacteria_odb10/ + │ │ ├── busco_sequences/ + │ │ ├── full_table.tsv + │ │ ├── hmmer_output/ + │ │ ├── missing_busco_list.tsv + │ │ ├── short_summary.json + │ │ └── short_summary.txt + │ ├── genome.fna/ + │ │ ├── logs/ + │ │ │ ├── hmmsearch_err.log + │ │ │ ├── hmmsearch_out.log + │ │ │ ├── prodigal_err.log + │ │ │ └── prodigal_out.log + │ │ ├── prodigal_output/ + │ │ │ └── predicted_genes/ + │ │ └── run_bacteria_odb10/ + │ │ ├── busco_sequences/ + │ │ ├── full_table.tsv + │ │ ├── hmmer_output/ + │ │ ├── missing_busco_list.tsv + │ │ ├── short_summary.json + │ │ └── short_summary.txt + │ └── logs/ + │ └── busco.log + ├── test-bacteria_odb10-busco.batch_summary.txt -> /tmp/tmplt9fv3tl/15/ff310a16d9ce7ad24e207a05ce718e/test-bacteria_odb10-busco.batch_summary.txt + └── versions.yml -> /tmp/tmplt9fv3tl/15/ff310a16d9ce7ad24e207a05ce718e/versions.yml + */ + +} + +workflow test_busco_eukaryote_metaeuk { + + input = [ + [ id:'test' ], // meta map + file( params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + ] + + BUSCO ( + input, + 'eukaryota_odb10', + [], // Download busco lineage + [], // No config + ) + + /* Output tree: + /tmp/tmpeq4dsir5/busco/ + ├── short_summary.specific.eukaryota_odb10.genome.fasta.json -> /tmp/tmp60hby2pk/6f/529873d91cda6bae3a4a6a21746aee/short_summary.specific.eukaryota_odb10.genome.fasta.json + ├── short_summary.specific.eukaryota_odb10.genome.fasta.txt -> /tmp/tmp60hby2pk/6f/529873d91cda6bae3a4a6a21746aee/short_summary.specific.eukaryota_odb10.genome.fasta.txt + ├── test-eukaryota_odb10-busco -> /tmp/tmp60hby2pk/6f/529873d91cda6bae3a4a6a21746aee/test-eukaryota_odb10-busco/ + │ ├── genome.fasta/ + │ │ ├── logs/ + │ │ │ ├── hmmsearch_err.log + │ │ │ ├── hmmsearch_out.log + │ │ │ ├── metaeuk_err.log + │ │ │ └── metaeuk_out.log + │ │ └── run_eukaryota_odb10/ + │ │ ├── busco_sequences/ + │ │ ├── full_table.tsv + │ │ ├── hmmer_output/ + │ │ ├── metaeuk_output/ + │ │ ├── missing_busco_list.tsv + │ │ ├── short_summary.json + │ │ └── short_summary.txt + │ └── logs/ + │ └── busco.log + ├── test-eukaryota_odb10-busco.batch_summary.txt -> /tmp/tmp60hby2pk/6f/529873d91cda6bae3a4a6a21746aee/test-eukaryota_odb10-busco.batch_summary.txt + └── versions.yml -> /tmp/tmp60hby2pk/6f/529873d91cda6bae3a4a6a21746aee/versions.yml + */ + +} + +workflow test_busco_eukaryote_augustus { + + input = [ + [ id:'test' ], // meta map + file( params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + ] + + BUSCO ( + input, + 'eukaryota_odb10', + [], // Download busco lineage + [], // No config + ) + + /* Output tree: + /tmp/tmp2xqaygjj/busco/ + ├── test-eukaryota_odb10-busco -> /tmp/tmpjqs61x9o/3f/67cc14e873c0ceb45e2a27594d624c/test-eukaryota_odb10-busco/ + │ ├── genome.fasta/ + │ │ ├── blast_db/ + │ │ │ ├── genome.fasta.ndb + │ │ │ ├── genome.fasta.nhr + │ │ │ ├── genome.fasta.nin + │ │ │ ├── genome.fasta.not + │ │ │ ├── genome.fasta.nsq + │ │ │ ├── genome.fasta.ntf + │ │ │ └── genome.fasta.nto + │ │ ├── logs/ + │ │ │ ├── makeblastdb_err.log + │ │ │ ├── makeblastdb_out.log + │ │ │ ├── tblastn_err.log + │ │ │ └── tblastn_out.log + │ │ └── run_eukaryota_odb10/ + │ │ ├── augustus_output/ + │ │ ├── blast_output/ + │ │ ├── busco_sequences/ + │ │ └── hmmer_output/ + │ └── logs/ + │ └── busco.log + ├── test-eukaryota_odb10-busco.batch_summary.txt -> /tmp/tmpjqs61x9o/3f/67cc14e873c0ceb45e2a27594d624c/test-eukaryota_odb10-busco.batch_summary.txt + └── versions.yml -> /tmp/tmpjqs61x9o/3f/67cc14e873c0ceb45e2a27594d624c/versions.yml + */ + +} + +workflow test_busco_protein { + + input = [ + [ id:'test' ], // meta map + file( params.test_data['candidatus_portiera_aleyrodidarum']['genome']['proteome_fasta'], checkIfExists: true) + ] + + BUSCO ( + input, + 'bacteria_odb10', + [], // Download busco lineage + [], // No config + ) + + /* Output tree: + /tmp/tmpzwd5dn56/busco/ + ├── short_summary.specific.bacteria_odb10.proteome.fasta.json -> /tmp/tmpk1nlgbf_/ae/0db07b5cd08fb23d0aba5f134ebbe2/short_summary.specific.bacteria_odb10.proteome.fasta.json + ├── short_summary.specific.bacteria_odb10.proteome.fasta.txt -> /tmp/tmpk1nlgbf_/ae/0db07b5cd08fb23d0aba5f134ebbe2/short_summary.specific.bacteria_odb10.proteome.fasta.txt + ├── test-bacteria_odb10-busco -> /tmp/tmpk1nlgbf_/ae/0db07b5cd08fb23d0aba5f134ebbe2/test-bacteria_odb10-busco/ + │ ├── logs/ + │ │ └── busco.log + │ └── proteome.fasta/ + │ ├── logs/ + │ │ ├── hmmsearch_err.log + │ │ └── hmmsearch_out.log + │ └── run_bacteria_odb10/ + │ ├── busco_sequences/ + │ ├── full_table.tsv + │ ├── hmmer_output/ + │ ├── missing_busco_list.tsv + │ ├── short_summary.json + │ └── short_summary.txt + ├── test-bacteria_odb10-busco.batch_summary.txt -> /tmp/tmpk1nlgbf_/ae/0db07b5cd08fb23d0aba5f134ebbe2/test-bacteria_odb10-busco.batch_summary.txt + └── versions.yml -> /tmp/tmpk1nlgbf_/ae/0db07b5cd08fb23d0aba5f134ebbe2/versions.yml + */ +} +workflow test_busco_transcriptome { + + input = [ + [ id:'test' ], // meta map + file( params.test_data['bacteroides_fragilis']['illumina']['test1_contigs_fa_gz'], checkIfExists: true) + ] + + BUSCO ( + input, + 'bacteria_odb10', + [], // Download busco lineage + [], // No config + ) + + /* Output tree: + /tmp/tmpitjyvo9g/busco/ + ├── short_summary.specific.bacteria_odb10.test1.contigs.fa.json -> /tmp/tmp6wqi0eyx/4f/ed0b23f0fc807bb68091298845c135/short_summary.specific.bacteria_odb10.test1.contigs.fa.json + ├── short_summary.specific.bacteria_odb10.test1.contigs.fa.txt -> /tmp/tmp6wqi0eyx/4f/ed0b23f0fc807bb68091298845c135/short_summary.specific.bacteria_odb10.test1.contigs.fa.txt + ├── test-bacteria_odb10-busco -> /tmp/tmp6wqi0eyx/4f/ed0b23f0fc807bb68091298845c135/test-bacteria_odb10-busco/ + │ ├── logs/ + │ │ └── busco.log + │ └── test1.contigs.fa/ + │ ├── blast_db/ + │ │ ├── test1.contigs.fa.ndb + │ │ ├── test1.contigs.fa.nhr + │ │ ├── test1.contigs.fa.nin + │ │ ├── test1.contigs.fa.not + │ │ ├── test1.contigs.fa.nsq + │ │ ├── test1.contigs.fa.ntf + │ │ └── test1.contigs.fa.nto + │ ├── logs/ + │ │ ├── hmmsearch_err.log + │ │ ├── hmmsearch_out.log + │ │ ├── makeblastdb_err.log + │ │ ├── makeblastdb_out.log + │ │ ├── tblastn_err.log + │ │ └── tblastn_out.log + │ ├── run_bacteria_odb10/ + │ │ ├── blast_output/ + │ │ ├── busco_sequences/ + │ │ ├── full_table.tsv + │ │ ├── hmmer_output/ + │ │ ├── missing_busco_list.tsv + │ │ ├── short_summary.json + │ │ ├── short_summary.txt + │ │ └── single_copy_proteins.faa + │ └── translated_proteins/ + │ ├── 1024388at2.faa + │ ├── 1054741at2.faa + │ ├── 1093223at2.faa + │ ├── 1151822at2.faa + │ ├── 143460at2.faa + │ ├── 1491686at2.faa + │ ├── 1504821at2.faa + │ ├── 1574817at2.faa + │ ├── 1592033at2.faa + │ ├── 1623045at2.faa + │ ├── 1661836at2.faa + │ ├── 1674344at2.faa + │ ├── 1698718at2.faa + │ ├── 1990650at2.faa + │ ├── 223233at2.faa + │ ├── 402899at2.faa + │ ├── 505485at2.faa + │ ├── 665824at2.faa + │ ├── 776861at2.faa + │ ├── 874197at2.faa + │ ├── 932854at2.faa + │ └── 95696at2.faa + ├── test-bacteria_odb10-busco.batch_summary.txt -> /tmp/tmp6wqi0eyx/4f/ed0b23f0fc807bb68091298845c135/test-bacteria_odb10-busco.batch_summary.txt + └── versions.yml -> /tmp/tmp6wqi0eyx/4f/ed0b23f0fc807bb68091298845c135/versions.yml + */ + +} diff --git a/tests/modules/busco/nextflow.config b/tests/modules/busco/nextflow.config new file mode 100644 index 00000000..9e8f718f --- /dev/null +++ b/tests/modules/busco/nextflow.config @@ -0,0 +1,28 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + + withName: 'test_busco_genome_single_fasta:BUSCO' { + ext.args = '--mode genome' + } + + withName: 'test_busco_genome_multi_fasta:BUSCO' { + ext.args = '--mode genome' + } + + withName: 'test_busco_eukaryote_metaeuk:BUSCO' { + ext.args = '--mode genome' + } + + withName: 'test_busco_eukaryote_augustus:BUSCO' { + ext.args = '--mode genome --augustus' + } + + withName: 'test_busco_protein:BUSCO' { + ext.args = '--mode proteins' + } + + withName: 'test_busco_transcriptome:BUSCO'{ + ext.args = '--mode transcriptome' + } +} diff --git a/tests/modules/busco/test.yml b/tests/modules/busco/test.yml new file mode 100644 index 00000000..43b810ba --- /dev/null +++ b/tests/modules/busco/test.yml @@ -0,0 +1,159 @@ +- name: busco test_busco_genome_single_fasta + command: nextflow run tests/modules/busco -entry test_busco_genome_single_fasta -c tests/config/nextflow.config + tags: + - busco + files: + - path: output/busco/short_summary.specific.bacteria_odb10.genome.fna.json + contains: + - "one_line_summary" + - "input_file" + - "mode" + - "dataset" + - path: output/busco/short_summary.specific.bacteria_odb10.genome.fna.txt + contains: + - "BUSCO version" + - "The lineage dataset is" + - "BUSCO was run in mode" + - "Complete BUSCOs" + - "Missing BUSCOs" + - "Dependencies and versions" + - path: output/busco/short_summary.specific.bacteroidetes_odb10.genome.fna.json + contains: + - "one_line_summary" + - "input_file" + - "mode" + - "dataset" + - path: output/busco/short_summary.specific.bacteroidetes_odb10.genome.fna.txt + contains: + - "BUSCO version" + - "The lineage dataset is" + - "BUSCO was run in mode" + - "Complete BUSCOs" + - "Missing BUSCOs" + - "Dependencies and versions" + - path: output/busco/test-bacteria_odb10-busco.batch_summary.txt + md5sum: e50690742e9ae6abdd2bf99334ff9e12 + - path: output/busco/test-bacteroidetes_odb10-busco.batch_summary.txt + md5sum: 4c1b2c4317c88398eddc30877ed740d9 + - path: output/busco/versions.yml + md5sum: 8aa830f71587d859df35c6cfab59f35d + +- name: busco test_busco_genome_multi_fasta + command: nextflow run tests/modules/busco -entry test_busco_genome_multi_fasta -c tests/config/nextflow.config + tags: + - busco + files: + - path: output/busco/short_summary.specific.bacteria_odb10.genome.fasta.json + contains: + - "one_line_summary" + - "input_file" + - "mode" + - "dataset" + - path: output/busco/short_summary.specific.bacteria_odb10.genome.fasta.txt + contains: + - "BUSCO version" + - "The lineage dataset is" + - "BUSCO was run in mode" + - "Complete BUSCOs" + - "Missing BUSCOs" + - "Dependencies and versions" + - path: output/busco/short_summary.specific.bacteria_odb10.genome.fna.json + contains: + - "one_line_summary" + - "input_file" + - "mode" + - "dataset" + - path: output/busco/short_summary.specific.bacteria_odb10.genome.fna.txt + contains: + - "BUSCO version" + - "The lineage dataset is" + - "BUSCO was run in mode" + - "Complete BUSCOs" + - "Missing BUSCOs" + - "Dependencies and versions" + - path: output/busco/test-bacteria_odb10-busco.batch_summary.txt + md5sum: 5360dfe83bec1f5741ee115e53e6b517 + - path: output/busco/versions.yml + md5sum: 9a959eb0a1f765777dff1ea2f5c139c0 + +- name: busco test_busco_eukaryote_metaeuk + command: nextflow run tests/modules/busco -entry test_busco_eukaryote_metaeuk -c tests/config/nextflow.config + tags: + - busco + files: + - path: output/busco/short_summary.specific.eukaryota_odb10.genome.fasta.json + contains: + - "one_line_summary" + - "input_file" + - "mode" + - "dataset" + - path: output/busco/short_summary.specific.eukaryota_odb10.genome.fasta.txt + contains: + - "BUSCO version" + - "The lineage dataset is" + - "BUSCO was run in mode" + - "Complete BUSCOs" + - "Missing BUSCOs" + - "Dependencies and versions" + - path: output/busco/test-eukaryota_odb10-busco.batch_summary.txt + md5sum: a70806f99ba5706d7353d3353b3f1d2b + - path: output/busco/versions.yml + md5sum: 34a808c257e6db1b0456f3b4372bc477 + +- name: busco test_busco_eukaryote_augustus + command: nextflow run tests/modules/busco -entry test_busco_eukaryote_augustus -c tests/config/nextflow.config + tags: + - busco + files: + - path: output/busco/test-eukaryota_odb10-busco.batch_summary.txt + md5sum: 660393dd43cd6a093b952d4b8ad41e40 + - path: output/busco/versions.yml + md5sum: 2caac915461410b16a1524ac064cd0df + +- name: busco test_busco_protein + command: nextflow run tests/modules/busco -entry test_busco_protein -c tests/config/nextflow.config + tags: + - busco + files: + - path: output/busco/short_summary.specific.bacteria_odb10.proteome.fasta.json + contains: + - "one_line_summary" + - "input_file" + - "mode" + - "dataset" + - path: output/busco/short_summary.specific.bacteria_odb10.proteome.fasta.txt + contains: + - "BUSCO version" + - "The lineage dataset is" + - "BUSCO was run in mode" + - "Complete BUSCOs" + - "Missing BUSCOs" + - "Dependencies and versions" + - path: output/busco/test-bacteria_odb10-busco.batch_summary.txt + md5sum: fd3b4e30ce74d1fcb95d6286d6e2049f + - path: output/busco/versions.yml + md5sum: d7392261a57960a7e6aea609dce824f5 + +- name: busco test_busco_transcriptome + command: nextflow run tests/modules/busco -entry test_busco_transcriptome -c tests/config/nextflow.config + tags: + - busco + files: + - path: output/busco/short_summary.specific.bacteria_odb10.test1.contigs.fa.json + contains: + - "one_line_summary" + - "input_file" + - "mode" + - "dataset" + - path: output/busco/short_summary.specific.bacteria_odb10.test1.contigs.fa.txt + contains: + - "BUSCO version" + - "The lineage dataset is" + - "BUSCO was run in mode" + - "Complete BUSCOs" + - "Missing BUSCOs" + - "Dependencies and versions" + - path: output/busco/test-bacteria_odb10-busco.batch_summary.txt + md5sum: 9a176cafe66ac0adca89dc34ad2be13f + - path: output/busco/versions.yml + md5sum: 30eacbc7df70f6b1e72e0a7b6d02a7e1