2021-05-25 17:10:48 -04:00
|
|
|
process LAST_LASTAL {
|
|
|
|
tag "$meta.id"
|
|
|
|
label 'process_high'
|
|
|
|
|
2021-07-28 04:10:44 -04:00
|
|
|
conda (params.enable_conda ? 'bioconda::last=1250' : null)
|
2021-11-26 02:58:40 -05:00
|
|
|
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
|
|
|
|
'https://depot.galaxyproject.org/singularity/last:1250--h2e03b76_0' :
|
|
|
|
'quay.io/biocontainers/last:1250--h2e03b76_0' }"
|
2021-05-25 17:10:48 -04:00
|
|
|
|
|
|
|
input:
|
Input a triple (id, fasta, params) to last/lastal (#563)
The `last/lastal` submodule takes query sequences to align to a target
index, and optionally takes one set of alignment parameters (including a
score matrix) computed by the `last/train` module for each of the
sequences.
In the previous implementation the sequences and the alignment
parameters were provided in different channels, causing them to be
sometimes desynchronised.
In the patched implementation, `last/lastal` takes a 3-tuple as
input to ensure synchronicity. To produce this tuple in a pipeline,
one can use the `join` command as in the following example.
LAST_TRAIN ( query,
target )
LAST_LASTAL ( query.join(LAST_TRAIN.out.param_file),
target )
In case no parameter file is computed one can pass a dummy file
to the module as follows:
LAST_LASTAL ( query.map { row -> [ row[0], row[1], [] ] },
target )
2021-07-06 04:35:04 -04:00
|
|
|
tuple val(meta), path(fastx), path (param_file)
|
2021-05-25 17:10:48 -04:00
|
|
|
path index
|
|
|
|
|
|
|
|
output:
|
|
|
|
tuple val(meta), path("*.maf.gz"), emit: maf
|
2021-10-01 09:04:56 -04:00
|
|
|
path "versions.yml" , emit: versions
|
2021-05-25 17:10:48 -04:00
|
|
|
|
|
|
|
script:
|
2021-11-26 02:58:40 -05:00
|
|
|
def args = task.ext.args ?: ''
|
2021-12-02 07:39:55 -05:00
|
|
|
def prefix = task.ext.prefix ?: "${meta.id}"
|
2021-05-25 17:10:48 -04:00
|
|
|
def trained_params = param_file ? "-p ${param_file}" : ''
|
|
|
|
"""
|
2021-06-14 07:27:27 -04:00
|
|
|
INDEX_NAME=\$(basename \$(ls $index/*.des) .des)
|
2021-05-25 17:10:48 -04:00
|
|
|
lastal \\
|
|
|
|
$trained_params \\
|
2021-11-26 02:58:40 -05:00
|
|
|
$args \\
|
2021-05-25 17:10:48 -04:00
|
|
|
-P $task.cpus \\
|
|
|
|
${index}/\$INDEX_NAME \\
|
|
|
|
$fastx \\
|
|
|
|
| gzip --no-name > ${prefix}.\$INDEX_NAME.maf.gz
|
|
|
|
# gzip needs --no-name otherwise it puts a timestamp in the file,
|
|
|
|
# which makes its checksum non-reproducible.
|
|
|
|
|
2021-09-27 04:41:24 -04:00
|
|
|
cat <<-END_VERSIONS > versions.yml
|
2021-11-26 02:58:40 -05:00
|
|
|
"${task.process}":
|
|
|
|
last: \$(lastal --version 2>&1 | sed 's/lastal //')
|
2021-09-27 04:41:24 -04:00
|
|
|
END_VERSIONS
|
2021-05-25 17:10:48 -04:00
|
|
|
"""
|
|
|
|
}
|