Support batch fasta analysis

This commit is contained in:
Mahesh Binzer-Panchal 2022-05-03 13:26:49 +00:00
parent 012f085378
commit 57db28386e
2 changed files with 21 additions and 9 deletions

View file

@ -8,7 +8,7 @@ process BUSCO {
'quay.io/biocontainers/busco:5.3.2--pyhdfd78af_0' }"
input:
tuple val(meta), path(fasta) // Required: meta map, and fasta sequence file
tuple val(meta), path(fasta, stageAs: 'tmp_input/*') // Required: meta map, and fasta sequence files
each lineage // Required: lineage to check against
path busco_lineages_path // Recommended: path to busco lineages - downloads if not set
path config_file // Optional: busco configuration file
@ -50,11 +50,21 @@ process BUSCO {
fi
# Ensure the input is uncompressed
gzip -cdf $fasta > ${prefix}_uncompressed.fasta
INPUT_SEQS=input_seqs
mkdir "\$INPUT_SEQS"
cd "\$INPUT_SEQS"
for FASTA in ../tmp_input/*; do
if [ "\${FASTA##*.}" == 'gz' ]; then
gzip -cdf "\$FASTA" > \$( basename "\$FASTA" .gz )
else
ln -s "\$FASTA" .
fi
done
cd ..
busco \\
--cpu $task.cpus \\
--in ${prefix}_uncompressed.fasta \\
--in "\$INPUT_SEQS" \\
--out ${prefix}-busco \\
--lineage_dataset $lineage \\
$busco_lineage_dir \\
@ -62,7 +72,7 @@ process BUSCO {
$args
# clean up
rm ${prefix}_uncompressed.fasta
rm -rf "\$INPUT_SEQS"
cat <<-END_VERSIONS > versions.yml
"${task.process}":

View file

@ -9,7 +9,10 @@ workflow test_busco {
input = [
[ id:'test', single_end:false ], // meta map
file( params.test_data['bacteroides_fragilis']['genome']['genome_fna_gz'], checkIfExists: true)
[
file( params.test_data['bacteroides_fragilis']['genome']['genome_fna_gz'], checkIfExists: true),
file( params.test_data['candidatus_portiera_aleyrodidarum']['genome']['genome_fasta'], checkIfExists: true)
]
]
BUSCO (
@ -20,4 +23,3 @@ workflow test_busco {
)
}