Add --auto-lineage option and remove single_end

Co-authored-by: Jill V. Hagey, PhD <jvhagey@gmail.com>
This commit is contained in:
Mahesh Binzer-Panchal 2022-05-06 11:48:21 +00:00
parent faf45edc5e
commit 35c5828f43
3 changed files with 82 additions and 20 deletions

View file

@ -9,7 +9,7 @@ process BUSCO {
input: input:
tuple val(meta), path('tmp_input/*') tuple val(meta), path('tmp_input/*')
each lineage // Required: lineage to check against each lineage // Required: lineage to check against, "auto" enables --auto-lineage instead
path busco_lineages_path // Recommended: path to busco lineages - downloads if not set path busco_lineages_path // Recommended: path to busco lineages - downloads if not set
path config_file // Optional: busco configuration file path config_file // Optional: busco configuration file
@ -27,6 +27,7 @@ process BUSCO {
def args = task.ext.args ?: '' def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}-${lineage}" def prefix = task.ext.prefix ?: "${meta.id}-${lineage}"
def busco_config = config_file ? "--config $config_file" : '' def busco_config = config_file ? "--config $config_file" : ''
def busco_lineage = lineage.equals('auto') ? '--auto-lineage' : "--lineage_dataset ${lineage}"
def busco_lineage_dir = busco_lineages_path ? "--offline --download_path ${busco_lineages_path}" : '' def busco_lineage_dir = busco_lineages_path ? "--offline --download_path ${busco_lineages_path}" : ''
""" """
# Nextflow changes the container --entrypoint to /bin/bash (container default entrypoint: /usr/local/env-execute) # Nextflow changes the container --entrypoint to /bin/bash (container default entrypoint: /usr/local/env-execute)
@ -63,7 +64,7 @@ process BUSCO {
--cpu $task.cpus \\ --cpu $task.cpus \\
--in "\$INPUT_SEQS" \\ --in "\$INPUT_SEQS" \\
--out ${prefix}-busco \\ --out ${prefix}-busco \\
--lineage_dataset $lineage \\ $busco_lineage \\
$busco_lineage_dir \\ $busco_lineage_dir \\
$busco_config \\ $busco_config \\
$args $args

View file

@ -4,28 +4,59 @@ nextflow.enable.dsl = 2
include { BUSCO } from '../../../modules/busco/main.nf' include { BUSCO } from '../../../modules/busco/main.nf'
// This tests genome decompression, empty input channels and data download
workflow test_busco_genome_single_fasta { workflow test_busco_genome_single_fasta {
input = [ input = [
[ id:'test', single_end:false ], // meta map [ id:'test' ], // meta map
file( params.test_data['bacteroides_fragilis']['genome']['genome_fna_gz'], checkIfExists: true) file( params.test_data['bacteroides_fragilis']['genome']['genome_fna_gz'], checkIfExists: true)
] ]
BUSCO ( BUSCO (
input, input,
['bacteria_odb10', 'bacteroidetes_odb10'], ['auto','bacteria_odb10', 'bacteroidetes_odb10'], // Launch with auto to use --auto-lineage, and specified lineages
[], // Download busco lineage [], // Download busco lineage
[], // No config [], // No config
) )
/* Output tree: /* Output tree:
/tmp/tmpisa3ktco/busco/ /tmp/tmp846crjv2/busco/
├── short_summary.specific.bacteria_odb10.genome.fna.json -> /tmp/tmpm91x0mn2/8a/ff5c15baba0942cca15a8d53e98009/short_summary.specific.bacteria_odb10.genome.fna.json ├── short_summary.generic.bacteria_odb10.genome.fna.json -> /tmp/tmpi6af66j1/18/8be22ecd7a71471ff5082bd512972b/short_summary.generic.bacteria_odb10.genome.fna.json
├── short_summary.specific.bacteria_odb10.genome.fna.txt -> /tmp/tmpm91x0mn2/8a/ff5c15baba0942cca15a8d53e98009/short_summary.specific.bacteria_odb10.genome.fna.txt ├── short_summary.generic.bacteria_odb10.genome.fna.txt -> /tmp/tmpi6af66j1/18/8be22ecd7a71471ff5082bd512972b/short_summary.generic.bacteria_odb10.genome.fna.txt
├── short_summary.specific.bacteroidetes_odb10.genome.fna.json -> /tmp/tmpm91x0mn2/91/3abf602561d35fcd917711402977a3/short_summary.specific.bacteroidetes_odb10.genome.fna.json ├── short_summary.specific.bacteria_odb10.genome.fna.json -> /tmp/tmpi6af66j1/45/107812e983a8e695c380ebc215e7d9/short_summary.specific.bacteria_odb10.genome.fna.json
├── short_summary.specific.bacteroidetes_odb10.genome.fna.txt -> /tmp/tmpm91x0mn2/91/3abf602561d35fcd917711402977a3/short_summary.specific.bacteroidetes_odb10.genome.fna.txt ├── short_summary.specific.bacteria_odb10.genome.fna.txt -> /tmp/tmpi6af66j1/45/107812e983a8e695c380ebc215e7d9/short_summary.specific.bacteria_odb10.genome.fna.txt
├── test-bacteria_odb10-busco -> /tmp/tmpm91x0mn2/8a/ff5c15baba0942cca15a8d53e98009/test-bacteria_odb10-busco/ ├── short_summary.specific.bacteroidales_odb10.genome.fna.json -> /tmp/tmpi6af66j1/18/8be22ecd7a71471ff5082bd512972b/short_summary.specific.bacteroidales_odb10.genome.fna.json
├── short_summary.specific.bacteroidales_odb10.genome.fna.txt -> /tmp/tmpi6af66j1/18/8be22ecd7a71471ff5082bd512972b/short_summary.specific.bacteroidales_odb10.genome.fna.txt
├── short_summary.specific.bacteroidetes_odb10.genome.fna.json -> /tmp/tmpi6af66j1/a2/eb4a34894f3ac5554759ad6c9f652b/short_summary.specific.bacteroidetes_odb10.genome.fna.json
├── short_summary.specific.bacteroidetes_odb10.genome.fna.txt -> /tmp/tmpi6af66j1/a2/eb4a34894f3ac5554759ad6c9f652b/short_summary.specific.bacteroidetes_odb10.genome.fna.txt
├── test-auto-busco -> /tmp/tmpi6af66j1/18/8be22ecd7a71471ff5082bd512972b/test-auto-busco/
│ ├── genome.fna/
│ │ ├── auto_lineage/
│ │ │ ├── run_archaea_odb10/
│ │ │ ├── run_bacteria_odb10/
│ │ │ └── run_eukaryota_odb10/
│ │ ├── logs/
│ │ │ ├── hmmsearch_err.log
│ │ │ ├── hmmsearch_out.log
│ │ │ ├── metaeuk_err.log
│ │ │ ├── metaeuk_out.log
│ │ │ ├── prodigal_err.log
│ │ │ ├── prodigal_out.log
│ │ │ ├── sepp_err.log
│ │ │ └── sepp_out.log
│ │ ├── prodigal_output/
│ │ │ └── predicted_genes/
│ │ ├── run_bacteria_odb10 -> /tmp/tmpi6af66j1/18/8be22ecd7a71471ff5082bd512972b/test-auto-busco/genome.fna/auto_lineage/run_bacteria_odb10/ [recursive, not followed]
│ │ └── run_bacteroidales_odb10/
│ │ ├── busco_sequences/
│ │ ├── full_table.tsv
│ │ ├── hmmer_output/
│ │ ├── missing_busco_list.tsv
│ │ ├── short_summary.json
│ │ └── short_summary.txt
│ └── logs/
│ └── busco.log
├── test-auto-busco.batch_summary.txt -> /tmp/tmpi6af66j1/18/8be22ecd7a71471ff5082bd512972b/test-auto-busco.batch_summary.txt
├── test-bacteria_odb10-busco -> /tmp/tmpi6af66j1/45/107812e983a8e695c380ebc215e7d9/test-bacteria_odb10-busco/
│ ├── genome.fna/ │ ├── genome.fna/
│ │ ├── logs/ │ │ ├── logs/
│ │ │ ├── hmmsearch_err.log │ │ │ ├── hmmsearch_err.log
@ -43,8 +74,8 @@ workflow test_busco_genome_single_fasta {
│ │ └── short_summary.txt │ │ └── short_summary.txt
│ └── logs/ │ └── logs/
│ └── busco.log │ └── busco.log
├── test-bacteria_odb10-busco.batch_summary.txt -> /tmp/tmpm91x0mn2/8a/ff5c15baba0942cca15a8d53e98009/test-bacteria_odb10-busco.batch_summary.txt ├── test-bacteria_odb10-busco.batch_summary.txt -> /tmp/tmpi6af66j1/45/107812e983a8e695c380ebc215e7d9/test-bacteria_odb10-busco.batch_summary.txt
├── test-bacteroidetes_odb10-busco -> /tmp/tmpm91x0mn2/91/3abf602561d35fcd917711402977a3/test-bacteroidetes_odb10-busco/ ├── test-bacteroidetes_odb10-busco -> /tmp/tmpi6af66j1/a2/eb4a34894f3ac5554759ad6c9f652b/test-bacteroidetes_odb10-busco/
│ ├── genome.fna/ │ ├── genome.fna/
│ │ ├── logs/ │ │ ├── logs/
│ │ │ ├── hmmsearch_err.log │ │ │ ├── hmmsearch_err.log
@ -62,8 +93,8 @@ workflow test_busco_genome_single_fasta {
│ │ └── short_summary.txt │ │ └── short_summary.txt
│ └── logs/ │ └── logs/
│ └── busco.log │ └── busco.log
├── test-bacteroidetes_odb10-busco.batch_summary.txt -> /tmp/tmpm91x0mn2/91/3abf602561d35fcd917711402977a3/test-bacteroidetes_odb10-busco.batch_summary.txt ├── test-bacteroidetes_odb10-busco.batch_summary.txt -> /tmp/tmpi6af66j1/a2/eb4a34894f3ac5554759ad6c9f652b/test-bacteroidetes_odb10-busco.batch_summary.txt
└── versions.yml -> /tmp/tmpm91x0mn2/91/3abf602561d35fcd917711402977a3/versions.yml └── versions.yml -> /tmp/tmpi6af66j1/18/8be22ecd7a71471ff5082bd512972b/versions.yml
*/ */
} }
@ -71,7 +102,7 @@ workflow test_busco_genome_single_fasta {
workflow test_busco_genome_multi_fasta { workflow test_busco_genome_multi_fasta {
input = [ input = [
[ id:'test', single_end:false ], // meta map [ id:'test' ], // meta map
[ [
file( params.test_data['bacteroides_fragilis']['genome']['genome_fna_gz'], checkIfExists: true), file( params.test_data['bacteroides_fragilis']['genome']['genome_fna_gz'], checkIfExists: true),
file( params.test_data['candidatus_portiera_aleyrodidarum']['genome']['genome_fasta'], checkIfExists: true) file( params.test_data['candidatus_portiera_aleyrodidarum']['genome']['genome_fasta'], checkIfExists: true)
@ -133,7 +164,7 @@ workflow test_busco_genome_multi_fasta {
workflow test_busco_eukaryote_metaeuk { workflow test_busco_eukaryote_metaeuk {
input = [ input = [
[ id:'test', single_end:false ], // meta map [ id:'test' ], // meta map
file( params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) file( params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
] ]
@ -174,7 +205,7 @@ workflow test_busco_eukaryote_metaeuk {
workflow test_busco_eukaryote_augustus { workflow test_busco_eukaryote_augustus {
input = [ input = [
[ id:'test', single_end:false ], // meta map [ id:'test' ], // meta map
file( params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) file( params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
] ]
@ -218,7 +249,7 @@ workflow test_busco_eukaryote_augustus {
workflow test_busco_protein { workflow test_busco_protein {
input = [ input = [
[ id:'test', single_end:false ], // meta map [ id:'test' ], // meta map
file( params.test_data['candidatus_portiera_aleyrodidarum']['genome']['proteome_fasta'], checkIfExists: true) file( params.test_data['candidatus_portiera_aleyrodidarum']['genome']['proteome_fasta'], checkIfExists: true)
] ]
@ -254,7 +285,7 @@ workflow test_busco_protein {
workflow test_busco_transcriptome { workflow test_busco_transcriptome {
input = [ input = [
[ id:'test', single_end:false ], // meta map [ id:'test' ], // meta map
file( params.test_data['bacteroides_fragilis']['illumina']['test1_contigs_fa_gz'], checkIfExists: true) file( params.test_data['bacteroides_fragilis']['illumina']['test1_contigs_fa_gz'], checkIfExists: true)
] ]

View file

@ -3,6 +3,20 @@
tags: tags:
- busco - busco
files: files:
- path: output/busco/short_summary.generic.bacteria_odb10.genome.fna.json
contains:
- "one_line_summary"
- "input_file"
- "mode"
- "dataset"
- path: output/busco/short_summary.generic.bacteria_odb10.genome.fna.txt
contains:
- "BUSCO version"
- "The lineage dataset is"
- "BUSCO was run in mode"
- "Complete BUSCOs"
- "Missing BUSCOs"
- "Dependencies and versions"
- path: output/busco/short_summary.specific.bacteria_odb10.genome.fna.json - path: output/busco/short_summary.specific.bacteria_odb10.genome.fna.json
contains: contains:
- "one_line_summary" - "one_line_summary"
@ -17,6 +31,20 @@
- "Complete BUSCOs" - "Complete BUSCOs"
- "Missing BUSCOs" - "Missing BUSCOs"
- "Dependencies and versions" - "Dependencies and versions"
- path: output/busco/short_summary.specific.bacteroidales_odb10.genome.fna.json
contains:
- "one_line_summary"
- "input_file"
- "mode"
- "dataset"
- path: output/busco/short_summary.specific.bacteroidales_odb10.genome.fna.txt
contains:
- "BUSCO version"
- "The lineage dataset is"
- "BUSCO was run in mode"
- "Complete BUSCOs"
- "Missing BUSCOs"
- "Dependencies and versions"
- path: output/busco/short_summary.specific.bacteroidetes_odb10.genome.fna.json - path: output/busco/short_summary.specific.bacteroidetes_odb10.genome.fna.json
contains: contains:
- "one_line_summary" - "one_line_summary"
@ -31,6 +59,8 @@
- "Complete BUSCOs" - "Complete BUSCOs"
- "Missing BUSCOs" - "Missing BUSCOs"
- "Dependencies and versions" - "Dependencies and versions"
- path: output/busco/test-auto-busco.batch_summary.txt
md5sum: 32f7765c310f33555c31c7a3e64c990e
- path: output/busco/test-bacteria_odb10-busco.batch_summary.txt - path: output/busco/test-bacteria_odb10-busco.batch_summary.txt
md5sum: e50690742e9ae6abdd2bf99334ff9e12 md5sum: e50690742e9ae6abdd2bf99334ff9e12
- path: output/busco/test-bacteroidetes_odb10-busco.batch_summary.txt - path: output/busco/test-bacteroidetes_odb10-busco.batch_summary.txt