Update busco input type (#2135)

* Update busco input type

* Fix singularity test
This commit is contained in:
Mahesh Binzer-Panchal 2022-09-29 22:35:51 +02:00 committed by GitHub
parent 96602d03fa
commit ebc466caac
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 3 additions and 302 deletions

View file

@ -9,7 +9,7 @@ process BUSCO {
input:
tuple val(meta), path('tmp_input/*')
each lineage // Required: lineage to check against, "auto" enables --auto-lineage instead
val lineage // Required: lineage to check against, "auto" enables --auto-lineage instead
path busco_lineages_path // Recommended: path to busco lineages - downloads if not set
path config_file // Optional: busco configuration file

View file

@ -13,58 +13,11 @@ workflow test_busco_genome_single_fasta {
BUSCO (
input,
['bacteria_odb10', 'bacteroidetes_odb10'], // Launch with 'auto' to use --auto-lineage, and specified lineages // 'auto' removed from test due to memory issues
'bacteria_odb10', // Launch with 'auto' to use --auto-lineage, and specified lineages // 'auto' removed from test due to memory issues
[], // Download busco lineage
[] // No config
)
/* Output tree:
/tmp/tmpyz_hi62i/busco/
├── short_summary.specific.bacteria_odb10.genome.fna.json -> /tmp/tmpza_0dth3/33/7d8c9b2c8931d9ad6a67aa843895e7/short_summary.specific.bacteria_odb10.genome.fna.json
├── short_summary.specific.bacteria_odb10.genome.fna.txt -> /tmp/tmpza_0dth3/33/7d8c9b2c8931d9ad6a67aa843895e7/short_summary.specific.bacteria_odb10.genome.fna.txt
├── short_summary.specific.bacteroidetes_odb10.genome.fna.json -> /tmp/tmpza_0dth3/6a/e95a0cd21785ce33d63b8f73a68a51/short_summary.specific.bacteroidetes_odb10.genome.fna.json
├── short_summary.specific.bacteroidetes_odb10.genome.fna.txt -> /tmp/tmpza_0dth3/6a/e95a0cd21785ce33d63b8f73a68a51/short_summary.specific.bacteroidetes_odb10.genome.fna.txt
├── test-bacteria_odb10-busco -> /tmp/tmpza_0dth3/33/7d8c9b2c8931d9ad6a67aa843895e7/test-bacteria_odb10-busco/
│ ├── genome.fna/
│ │ ├── logs/
│ │ │ ├── hmmsearch_err.log
│ │ │ ├── hmmsearch_out.log
│ │ │ ├── prodigal_err.log
│ │ │ └── prodigal_out.log
│ │ ├── prodigal_output/
│ │ │ └── predicted_genes/
│ │ └── run_bacteria_odb10/
│ │ ├── busco_sequences/
│ │ ├── full_table.tsv
│ │ ├── hmmer_output/
│ │ ├── missing_busco_list.tsv
│ │ ├── short_summary.json
│ │ └── short_summary.txt
│ └── logs/
│ └── busco.log
├── test-bacteria_odb10-busco.batch_summary.txt -> /tmp/tmpza_0dth3/33/7d8c9b2c8931d9ad6a67aa843895e7/test-bacteria_odb10-busco.batch_summary.txt
├── test-bacteroidetes_odb10-busco -> /tmp/tmpza_0dth3/6a/e95a0cd21785ce33d63b8f73a68a51/test-bacteroidetes_odb10-busco/
│ ├── genome.fna/
│ │ ├── logs/
│ │ │ ├── hmmsearch_err.log
│ │ │ ├── hmmsearch_out.log
│ │ │ ├── prodigal_err.log
│ │ │ └── prodigal_out.log
│ │ ├── prodigal_output/
│ │ │ └── predicted_genes/
│ │ └── run_bacteroidetes_odb10/
│ │ ├── busco_sequences/
│ │ ├── full_table.tsv
│ │ ├── hmmer_output/
│ │ ├── missing_busco_list.tsv
│ │ ├── short_summary.json
│ │ └── short_summary.txt
│ └── logs/
│ └── busco.log
├── test-bacteroidetes_odb10-busco.batch_summary.txt -> /tmp/tmpza_0dth3/6a/e95a0cd21785ce33d63b8f73a68a51/test-bacteroidetes_odb10-busco.batch_summary.txt
└── versions.yml -> /tmp/tmpza_0dth3/6a/e95a0cd21785ce33d63b8f73a68a51/versions.yml
*/
}
workflow test_busco_genome_multi_fasta {
@ -84,49 +37,6 @@ workflow test_busco_genome_multi_fasta {
[] // No config
)
/* Output tree:
/tmp/tmpk19byek7/busco/
├── short_summary.specific.bacteria_odb10.genome.fasta.json -> /tmp/tmplt9fv3tl/15/ff310a16d9ce7ad24e207a05ce718e/short_summary.specific.bacteria_odb10.genome.fasta.json
├── short_summary.specific.bacteria_odb10.genome.fasta.txt -> /tmp/tmplt9fv3tl/15/ff310a16d9ce7ad24e207a05ce718e/short_summary.specific.bacteria_odb10.genome.fasta.txt
├── short_summary.specific.bacteria_odb10.genome.fna.json -> /tmp/tmplt9fv3tl/15/ff310a16d9ce7ad24e207a05ce718e/short_summary.specific.bacteria_odb10.genome.fna.json
├── short_summary.specific.bacteria_odb10.genome.fna.txt -> /tmp/tmplt9fv3tl/15/ff310a16d9ce7ad24e207a05ce718e/short_summary.specific.bacteria_odb10.genome.fna.txt
├── test-bacteria_odb10-busco -> /tmp/tmplt9fv3tl/15/ff310a16d9ce7ad24e207a05ce718e/test-bacteria_odb10-busco/
│ ├── genome.fasta/
│ │ ├── logs/
│ │ │ ├── hmmsearch_err.log
│ │ │ ├── hmmsearch_out.log
│ │ │ ├── prodigal_err.log
│ │ │ └── prodigal_out.log
│ │ ├── prodigal_output/
│ │ │ └── predicted_genes/
│ │ └── run_bacteria_odb10/
│ │ ├── busco_sequences/
│ │ ├── full_table.tsv
│ │ ├── hmmer_output/
│ │ ├── missing_busco_list.tsv
│ │ ├── short_summary.json
│ │ └── short_summary.txt
│ ├── genome.fna/
│ │ ├── logs/
│ │ │ ├── hmmsearch_err.log
│ │ │ ├── hmmsearch_out.log
│ │ │ ├── prodigal_err.log
│ │ │ └── prodigal_out.log
│ │ ├── prodigal_output/
│ │ │ └── predicted_genes/
│ │ └── run_bacteria_odb10/
│ │ ├── busco_sequences/
│ │ ├── full_table.tsv
│ │ ├── hmmer_output/
│ │ ├── missing_busco_list.tsv
│ │ ├── short_summary.json
│ │ └── short_summary.txt
│ └── logs/
│ └── busco.log
├── test-bacteria_odb10-busco.batch_summary.txt -> /tmp/tmplt9fv3tl/15/ff310a16d9ce7ad24e207a05ce718e/test-bacteria_odb10-busco.batch_summary.txt
└── versions.yml -> /tmp/tmplt9fv3tl/15/ff310a16d9ce7ad24e207a05ce718e/versions.yml
*/
}
workflow test_busco_eukaryote_metaeuk {
@ -143,31 +53,6 @@ workflow test_busco_eukaryote_metaeuk {
[] // No config
)
/* Output tree:
/tmp/tmpeq4dsir5/busco/
├── short_summary.specific.eukaryota_odb10.genome.fasta.json -> /tmp/tmp60hby2pk/6f/529873d91cda6bae3a4a6a21746aee/short_summary.specific.eukaryota_odb10.genome.fasta.json
├── short_summary.specific.eukaryota_odb10.genome.fasta.txt -> /tmp/tmp60hby2pk/6f/529873d91cda6bae3a4a6a21746aee/short_summary.specific.eukaryota_odb10.genome.fasta.txt
├── test-eukaryota_odb10-busco -> /tmp/tmp60hby2pk/6f/529873d91cda6bae3a4a6a21746aee/test-eukaryota_odb10-busco/
│ ├── genome.fasta/
│ │ ├── logs/
│ │ │ ├── hmmsearch_err.log
│ │ │ ├── hmmsearch_out.log
│ │ │ ├── metaeuk_err.log
│ │ │ └── metaeuk_out.log
│ │ └── run_eukaryota_odb10/
│ │ ├── busco_sequences/
│ │ ├── full_table.tsv
│ │ ├── hmmer_output/
│ │ ├── metaeuk_output/
│ │ ├── missing_busco_list.tsv
│ │ ├── short_summary.json
│ │ └── short_summary.txt
│ └── logs/
│ └── busco.log
├── test-eukaryota_odb10-busco.batch_summary.txt -> /tmp/tmp60hby2pk/6f/529873d91cda6bae3a4a6a21746aee/test-eukaryota_odb10-busco.batch_summary.txt
└── versions.yml -> /tmp/tmp60hby2pk/6f/529873d91cda6bae3a4a6a21746aee/versions.yml
*/
}
workflow test_busco_eukaryote_augustus {
@ -184,34 +69,6 @@ workflow test_busco_eukaryote_augustus {
[] // No config
)
/* Output tree:
/tmp/tmp2xqaygjj/busco/
├── test-eukaryota_odb10-busco -> /tmp/tmpjqs61x9o/3f/67cc14e873c0ceb45e2a27594d624c/test-eukaryota_odb10-busco/
│ ├── genome.fasta/
│ │ ├── blast_db/
│ │ │ ├── genome.fasta.ndb
│ │ │ ├── genome.fasta.nhr
│ │ │ ├── genome.fasta.nin
│ │ │ ├── genome.fasta.not
│ │ │ ├── genome.fasta.nsq
│ │ │ ├── genome.fasta.ntf
│ │ │ └── genome.fasta.nto
│ │ ├── logs/
│ │ │ ├── makeblastdb_err.log
│ │ │ ├── makeblastdb_out.log
│ │ │ ├── tblastn_err.log
│ │ │ └── tblastn_out.log
│ │ └── run_eukaryota_odb10/
│ │ ├── augustus_output/
│ │ ├── blast_output/
│ │ ├── busco_sequences/
│ │ └── hmmer_output/
│ └── logs/
│ └── busco.log
├── test-eukaryota_odb10-busco.batch_summary.txt -> /tmp/tmpjqs61x9o/3f/67cc14e873c0ceb45e2a27594d624c/test-eukaryota_odb10-busco.batch_summary.txt
└── versions.yml -> /tmp/tmpjqs61x9o/3f/67cc14e873c0ceb45e2a27594d624c/versions.yml
*/
}
workflow test_busco_protein {
@ -228,28 +85,8 @@ workflow test_busco_protein {
[] // No config
)
/* Output tree:
/tmp/tmpzwd5dn56/busco/
├── short_summary.specific.bacteria_odb10.proteome.fasta.json -> /tmp/tmpk1nlgbf_/ae/0db07b5cd08fb23d0aba5f134ebbe2/short_summary.specific.bacteria_odb10.proteome.fasta.json
├── short_summary.specific.bacteria_odb10.proteome.fasta.txt -> /tmp/tmpk1nlgbf_/ae/0db07b5cd08fb23d0aba5f134ebbe2/short_summary.specific.bacteria_odb10.proteome.fasta.txt
├── test-bacteria_odb10-busco -> /tmp/tmpk1nlgbf_/ae/0db07b5cd08fb23d0aba5f134ebbe2/test-bacteria_odb10-busco/
│ ├── logs/
│ │ └── busco.log
│ └── proteome.fasta/
│ ├── logs/
│ │ ├── hmmsearch_err.log
│ │ └── hmmsearch_out.log
│ └── run_bacteria_odb10/
│ ├── busco_sequences/
│ ├── full_table.tsv
│ ├── hmmer_output/
│ ├── missing_busco_list.tsv
│ ├── short_summary.json
│ └── short_summary.txt
├── test-bacteria_odb10-busco.batch_summary.txt -> /tmp/tmpk1nlgbf_/ae/0db07b5cd08fb23d0aba5f134ebbe2/test-bacteria_odb10-busco.batch_summary.txt
└── versions.yml -> /tmp/tmpk1nlgbf_/ae/0db07b5cd08fb23d0aba5f134ebbe2/versions.yml
*/
}
workflow test_busco_transcriptome {
input = [
@ -264,63 +101,4 @@ workflow test_busco_transcriptome {
[] // No config
)
/* Output tree:
/tmp/tmpitjyvo9g/busco/
├── short_summary.specific.bacteria_odb10.test1.contigs.fa.json -> /tmp/tmp6wqi0eyx/4f/ed0b23f0fc807bb68091298845c135/short_summary.specific.bacteria_odb10.test1.contigs.fa.json
├── short_summary.specific.bacteria_odb10.test1.contigs.fa.txt -> /tmp/tmp6wqi0eyx/4f/ed0b23f0fc807bb68091298845c135/short_summary.specific.bacteria_odb10.test1.contigs.fa.txt
├── test-bacteria_odb10-busco -> /tmp/tmp6wqi0eyx/4f/ed0b23f0fc807bb68091298845c135/test-bacteria_odb10-busco/
│ ├── logs/
│ │ └── busco.log
│ └── test1.contigs.fa/
│ ├── blast_db/
│ │ ├── test1.contigs.fa.ndb
│ │ ├── test1.contigs.fa.nhr
│ │ ├── test1.contigs.fa.nin
│ │ ├── test1.contigs.fa.not
│ │ ├── test1.contigs.fa.nsq
│ │ ├── test1.contigs.fa.ntf
│ │ └── test1.contigs.fa.nto
│ ├── logs/
│ │ ├── hmmsearch_err.log
│ │ ├── hmmsearch_out.log
│ │ ├── makeblastdb_err.log
│ │ ├── makeblastdb_out.log
│ │ ├── tblastn_err.log
│ │ └── tblastn_out.log
│ ├── run_bacteria_odb10/
│ │ ├── blast_output/
│ │ ├── busco_sequences/
│ │ ├── full_table.tsv
│ │ ├── hmmer_output/
│ │ ├── missing_busco_list.tsv
│ │ ├── short_summary.json
│ │ ├── short_summary.txt
│ │ └── single_copy_proteins.faa
│ └── translated_proteins/
│ ├── 1024388at2.faa
│ ├── 1054741at2.faa
│ ├── 1093223at2.faa
│ ├── 1151822at2.faa
│ ├── 143460at2.faa
│ ├── 1491686at2.faa
│ ├── 1504821at2.faa
│ ├── 1574817at2.faa
│ ├── 1592033at2.faa
│ ├── 1623045at2.faa
│ ├── 1661836at2.faa
│ ├── 1674344at2.faa
│ ├── 1698718at2.faa
│ ├── 1990650at2.faa
│ ├── 223233at2.faa
│ ├── 402899at2.faa
│ ├── 505485at2.faa
│ ├── 665824at2.faa
│ ├── 776861at2.faa
│ ├── 874197at2.faa
│ ├── 932854at2.faa
│ └── 95696at2.faa
├── test-bacteria_odb10-busco.batch_summary.txt -> /tmp/tmp6wqi0eyx/4f/ed0b23f0fc807bb68091298845c135/test-bacteria_odb10-busco.batch_summary.txt
└── versions.yml -> /tmp/tmp6wqi0eyx/4f/ed0b23f0fc807bb68091298845c135/versions.yml
*/
}

View file

@ -16,19 +16,6 @@
- "Complete BUSCOs"
- "Missing BUSCOs"
- "Dependencies and versions"
- path: output/busco/short_summary.specific.bacteroidetes_odb10.genome.fna.json
contains:
- "one_line_summary"
- "mode"
- "dataset"
- path: output/busco/short_summary.specific.bacteroidetes_odb10.genome.fna.txt
contains:
- "BUSCO version"
- "The lineage dataset is"
- "BUSCO was run in mode"
- "Complete BUSCOs"
- "Missing BUSCOs"
- "Dependencies and versions"
- path: output/busco/test-bacteria_odb10-busco.batch_summary.txt
md5sum: bc2440f8a68d7fbf931ff911c1c3fdfa
- path: output/busco/test-bacteria_odb10-busco/genome.fna/logs/bbtools_err.log
@ -93,70 +80,6 @@
- "DEBUG:busco.run_BUSCO"
- "Results from dataset"
- "how to cite BUSCO"
- path: output/busco/test-bacteroidetes_odb10-busco.batch_summary.txt
md5sum: 5ad8ecfbf4058b059e5edeb9170fc683
- path: output/busco/test-bacteroidetes_odb10-busco/genome.fna/logs/bbtools_err.log
- path: output/busco/test-bacteroidetes_odb10-busco/genome.fna/logs/bbtools_out.log
contains:
- "IUPAC"
- "genome scaffold"
- "Coverage"
- path: output/busco/test-bacteroidetes_odb10-busco/genome.fna/logs/hmmsearch_err.log
- path: output/busco/test-bacteroidetes_odb10-busco/genome.fna/logs/hmmsearch_out.log
contains:
- "# hmmsearch :: search profile(s) against a sequence database"
- "# target sequence database:"
- "Internal pipeline statistics summary:"
- "[ok]"
- path: output/busco/test-bacteroidetes_odb10-busco/genome.fna/logs/prodigal_err.log
md5sum: 538510cfc7483498210f01e53fe035ad
- path: output/busco/test-bacteroidetes_odb10-busco/genome.fna/logs/prodigal_out.log
md5sum: 61050b0706addc9498b2088a2d6efa9a
- path: output/busco/test-bacteroidetes_odb10-busco/genome.fna/prodigal_output/.checkpoint
contains:
- "Tool: prodigal"
- "Completed"
- "jobs"
- path: output/busco/test-bacteroidetes_odb10-busco/genome.fna/prodigal_output/predicted_genes/predicted.faa
md5sum: 836e9a80d33d8b89168f07ddc13ee991
- path: output/busco/test-bacteroidetes_odb10-busco/genome.fna/prodigal_output/predicted_genes/predicted.fna
md5sum: 20eeb75f86842e6e136f02bca8b73a9f
- path: output/busco/test-bacteroidetes_odb10-busco/genome.fna/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_11.faa
md5sum: 836e9a80d33d8b89168f07ddc13ee991
- path: output/busco/test-bacteroidetes_odb10-busco/genome.fna/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_11.fna
md5sum: 20eeb75f86842e6e136f02bca8b73a9f
- path: output/busco/test-bacteroidetes_odb10-busco/genome.fna/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_11_err.log
md5sum: 538510cfc7483498210f01e53fe035ad
- path: output/busco/test-bacteroidetes_odb10-busco/genome.fna/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_11_out.log
md5sum: 61050b0706addc9498b2088a2d6efa9a
- path: output/busco/test-bacteroidetes_odb10-busco/genome.fna/run_bacteroidetes_odb10/.bbtools_output/.checkpoint
contains:
- "Tool: bbtools"
- "Completed"
- "jobs"
- path: output/busco/test-bacteroidetes_odb10-busco/genome.fna/run_bacteroidetes_odb10/busco_sequences/fragmented_busco_sequences.tar.gz
- path: output/busco/test-bacteroidetes_odb10-busco/genome.fna/run_bacteroidetes_odb10/busco_sequences/multi_copy_busco_sequences.tar.gz
- path: output/busco/test-bacteroidetes_odb10-busco/genome.fna/run_bacteroidetes_odb10/busco_sequences/single_copy_busco_sequences.tar.gz
- path: output/busco/test-bacteroidetes_odb10-busco/genome.fna/run_bacteroidetes_odb10/full_table.tsv
md5sum: 2360bb06f7171dafaa7ebf879789689b
- path: output/busco/test-bacteroidetes_odb10-busco/genome.fna/run_bacteroidetes_odb10/hmmer_output.tar.gz
- path: output/busco/test-bacteroidetes_odb10-busco/genome.fna/run_bacteroidetes_odb10/missing_busco_list.tsv
md5sum: e1fb12cf416448d35fbc5c508065963a
- path: output/busco/test-bacteroidetes_odb10-busco/genome.fna/run_bacteroidetes_odb10/short_summary.json
contains:
- "one_line_summary"
- "mode"
- "lineage_dataset"
- path: output/busco/test-bacteroidetes_odb10-busco/genome.fna/run_bacteroidetes_odb10/short_summary.txt
contains:
- "# BUSCO version is:"
- "Results:"
- "busco:"
- path: output/busco/test-bacteroidetes_odb10-busco/logs/busco.log
contains:
- "DEBUG:busco.run_BUSCO"
- "Results from dataset"
- "how to cite BUSCO"
- name: busco test_busco_genome_multi_fasta
command: nextflow run ./tests/modules/busco -entry test_busco_genome_multi_fasta -c ./tests/config/nextflow.config -c ./tests/modules/busco/nextflow.config