mirror of
https://github.com/MillironX/nf-core_modules.git
synced 2025-01-03 04:52:09 -05:00
New last/train module to train alignment parameters. (#492)
* New last/train module to train alignment parameters. The last-train command creates a parameter file that will be used by last/lastal module for sequence alignment. It takes indexed sequences and query sequences as input and we use the metadata of both to create an id of the parameter output file. Submission of the LAST modules is discussed in more details in the issue #464. For consistancy, we use LAST version 1219 for this whole development and will upgrade later. * Corrected files according to the nf-core v1.14 standards. * Fixed function.nf file for the last-train module. * Apply suggestions from code review Co-authored-by: Harshil Patel <drpatelh@users.noreply.github.com> * Find index name. * Correct after the input channels were changed. * Use double underscore as a name separator. Single underscores can happen in ids, therefore, we would like to keep two underscores. * Remove extra spaces. * Fixed the passing of the "score matrix" line. * Apply suggestions from code review Co-authored-by: Harshil Patel <drpatelh@users.noreply.github.com> * Update software/last/train/main.nf Co-authored-by: Harshil Patel <drpatelh@users.noreply.github.com>
This commit is contained in:
parent
e84eaa22f3
commit
b592cea30b
7 changed files with 206 additions and 0 deletions
70
software/last/train/functions.nf
Normal file
70
software/last/train/functions.nf
Normal file
|
@ -0,0 +1,70 @@
|
||||||
|
/*
|
||||||
|
* -----------------------------------------------------
|
||||||
|
* Utility functions used in nf-core DSL2 module files
|
||||||
|
* -----------------------------------------------------
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Extract name of software tool from process name using $task.process
|
||||||
|
*/
|
||||||
|
def getSoftwareName(task_process) {
|
||||||
|
return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Function to initialise default values and to generate a Groovy Map of available options for nf-core modules
|
||||||
|
*/
|
||||||
|
def initOptions(Map args) {
|
||||||
|
def Map options = [:]
|
||||||
|
options.args = args.args ?: ''
|
||||||
|
options.args2 = args.args2 ?: ''
|
||||||
|
options.args3 = args.args3 ?: ''
|
||||||
|
options.publish_by_meta = args.publish_by_meta ?: []
|
||||||
|
options.publish_dir = args.publish_dir ?: ''
|
||||||
|
options.publish_files = args.publish_files
|
||||||
|
options.suffix = args.suffix ?: ''
|
||||||
|
return options
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Tidy up and join elements of a list to return a path string
|
||||||
|
*/
|
||||||
|
def getPathFromList(path_list) {
|
||||||
|
def paths = path_list.findAll { item -> !item?.trim().isEmpty() } // Remove empty entries
|
||||||
|
paths = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes
|
||||||
|
return paths.join('/')
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Function to save/publish module results
|
||||||
|
*/
|
||||||
|
def saveFiles(Map args) {
|
||||||
|
if (!args.filename.endsWith('.version.txt')) {
|
||||||
|
def ioptions = initOptions(args.options)
|
||||||
|
def path_list = [ ioptions.publish_dir ?: args.publish_dir ]
|
||||||
|
if (ioptions.publish_by_meta) {
|
||||||
|
def key_list = ioptions.publish_by_meta instanceof List ? ioptions.publish_by_meta : args.publish_by_meta
|
||||||
|
for (key in key_list) {
|
||||||
|
if (args.meta && key instanceof String) {
|
||||||
|
def path = key
|
||||||
|
if (args.meta.containsKey(key)) {
|
||||||
|
path = args.meta[key] instanceof Boolean ? "${key}_${args.meta[key]}".toString() : args.meta[key]
|
||||||
|
}
|
||||||
|
path = path instanceof String ? path : ''
|
||||||
|
path_list.add(path)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (ioptions.publish_files instanceof Map) {
|
||||||
|
for (ext in ioptions.publish_files) {
|
||||||
|
if (args.filename.endsWith(ext.key)) {
|
||||||
|
def ext_list = path_list.collect()
|
||||||
|
ext_list.add(ext.value)
|
||||||
|
return "${getPathFromList(ext_list)}/$args.filename"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else if (ioptions.publish_files == null) {
|
||||||
|
return "${getPathFromList(path_list)}/$args.filename"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
44
software/last/train/main.nf
Normal file
44
software/last/train/main.nf
Normal file
|
@ -0,0 +1,44 @@
|
||||||
|
// Import generic module functions
|
||||||
|
include { initOptions; saveFiles; getSoftwareName } from './functions'
|
||||||
|
|
||||||
|
params.options = [:]
|
||||||
|
options = initOptions(params.options)
|
||||||
|
|
||||||
|
process LAST_TRAIN {
|
||||||
|
tag "$meta.id"
|
||||||
|
label 'process_high'
|
||||||
|
publishDir "${params.outdir}",
|
||||||
|
mode: params.publish_dir_mode,
|
||||||
|
saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:meta, publish_by_meta:['id']) }
|
||||||
|
|
||||||
|
conda (params.enable_conda ? "bioconda::last=1219" : null)
|
||||||
|
if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) {
|
||||||
|
container "https://depot.galaxyproject.org/singularity/last:1219--h2e03b76_0"
|
||||||
|
} else {
|
||||||
|
container "quay.io/biocontainers/last:1219--h2e03b76_0"
|
||||||
|
}
|
||||||
|
|
||||||
|
input:
|
||||||
|
tuple val(meta), path(fastx)
|
||||||
|
path index
|
||||||
|
|
||||||
|
output:
|
||||||
|
tuple val(meta), path("*.par"), emit: param_file
|
||||||
|
path "*.version.txt" , emit: version
|
||||||
|
|
||||||
|
script:
|
||||||
|
def software = getSoftwareName(task.process)
|
||||||
|
def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}"
|
||||||
|
"""
|
||||||
|
INDEX_NAME=`find -L lastdb/ -name "*.bck" | sed 's/.bck//' | sed 's,lastdb/,,'`
|
||||||
|
|
||||||
|
last-train \\
|
||||||
|
$options.args \\
|
||||||
|
-P $task.cpus \\
|
||||||
|
${index}/\$INDEX_NAME \\
|
||||||
|
$fastx \\
|
||||||
|
> ${prefix}.\$INDEX_NAME.par
|
||||||
|
|
||||||
|
lastdb --version | sed 's/lastdb //' > ${software}.version.txt
|
||||||
|
"""
|
||||||
|
}
|
48
software/last/train/meta.yml
Normal file
48
software/last/train/meta.yml
Normal file
|
@ -0,0 +1,48 @@
|
||||||
|
name: last_train
|
||||||
|
description: Find suitable score parameters for sequence alignment
|
||||||
|
keywords:
|
||||||
|
- LAST
|
||||||
|
- train
|
||||||
|
- fastq
|
||||||
|
- fasta
|
||||||
|
tools:
|
||||||
|
- last:
|
||||||
|
description: LAST finds & aligns related regions of sequences.
|
||||||
|
homepage: https://gitlab.com/mcfrith/last
|
||||||
|
documentation: https://gitlab.com/mcfrith/last/-/blob/main/doc/last-train.rst
|
||||||
|
tool_dev_url: https://gitlab.com/mcfrith/last
|
||||||
|
doi: ""
|
||||||
|
licence: ['GPL v3-or-later']
|
||||||
|
|
||||||
|
input:
|
||||||
|
- index:
|
||||||
|
type: directory
|
||||||
|
description: Directory containing the files of the LAST index
|
||||||
|
pattern: "lastdb/"
|
||||||
|
- meta:
|
||||||
|
type: map
|
||||||
|
description: |
|
||||||
|
Groovy Map containing sample information
|
||||||
|
e.g. [ id:'test', single_end:false ]
|
||||||
|
- fastx:
|
||||||
|
type: file
|
||||||
|
description: FASTA/FASTQ file
|
||||||
|
pattern: "*.{fasta,fastq}"
|
||||||
|
|
||||||
|
output:
|
||||||
|
- meta:
|
||||||
|
type: map
|
||||||
|
description: |
|
||||||
|
Groovy Map containing sample information
|
||||||
|
e.g. [ id:'test', single_end:false ]
|
||||||
|
- version:
|
||||||
|
type: file
|
||||||
|
description: File containing software version
|
||||||
|
pattern: "*.{version.txt}"
|
||||||
|
- param_file:
|
||||||
|
type: file
|
||||||
|
description: Trained parameter file
|
||||||
|
pattern: "*.par"
|
||||||
|
|
||||||
|
authors:
|
||||||
|
- "@aleksandrabliznina"
|
|
@ -374,6 +374,10 @@ last/lastdb:
|
||||||
- software/last/lastdb/**
|
- software/last/lastdb/**
|
||||||
- tests/software/last/lastdb/**
|
- tests/software/last/lastdb/**
|
||||||
|
|
||||||
|
last/train:
|
||||||
|
- software/last/train/**
|
||||||
|
- tests/software/last/train/**
|
||||||
|
|
||||||
mash/sketch:
|
mash/sketch:
|
||||||
- software/mash/sketch/**
|
- software/mash/sketch/**
|
||||||
- tests/software/mash/sketch/**
|
- tests/software/mash/sketch/**
|
||||||
|
|
|
@ -26,6 +26,8 @@ params {
|
||||||
|
|
||||||
all_sites_fas = "${test_data_dir}/genomics/sarscov2/genome/alignment/all_sites.fas"
|
all_sites_fas = "${test_data_dir}/genomics/sarscov2/genome/alignment/all_sites.fas"
|
||||||
informative_sites_fas = "${test_data_dir}/genomics/sarscov2/genome/alignment/informative_sites.fas"
|
informative_sites_fas = "${test_data_dir}/genomics/sarscov2/genome/alignment/informative_sites.fas"
|
||||||
|
|
||||||
|
lastdb_tar_gz = "${test_data_dir}/genomics/sarscov2/genome/alignment/last/lastdb.tar.gz"
|
||||||
}
|
}
|
||||||
'illumina' {
|
'illumina' {
|
||||||
test_single_end_bam = "${test_data_dir}/genomics/sarscov2/illumina/bam/test.single_end.bam"
|
test_single_end_bam = "${test_data_dir}/genomics/sarscov2/illumina/bam/test.single_end.bam"
|
||||||
|
|
15
tests/software/last/train/main.nf
Normal file
15
tests/software/last/train/main.nf
Normal file
|
@ -0,0 +1,15 @@
|
||||||
|
#!/usr/bin/env nextflow
|
||||||
|
|
||||||
|
nextflow.enable.dsl = 2
|
||||||
|
|
||||||
|
include { UNTAR } from '../../../../software/untar/main.nf' addParams( options: [:] )
|
||||||
|
include { LAST_TRAIN } from '../../../../software/last/train/main.nf' addParams( options: [:] )
|
||||||
|
|
||||||
|
workflow test_last_train {
|
||||||
|
|
||||||
|
db = [ file(params.test_data['sarscov2']['genome']['lastdb_tar_gz'], checkIfExists: true) ]
|
||||||
|
input = [ [ id:'contigs' ], // meta map
|
||||||
|
file(params.test_data['sarscov2']['illumina']['contigs_fasta'], checkIfExists: true) ]
|
||||||
|
UNTAR ( db )
|
||||||
|
LAST_TRAIN ( input, UNTAR.out.untar )
|
||||||
|
}
|
23
tests/software/last/train/test.yml
Normal file
23
tests/software/last/train/test.yml
Normal file
|
@ -0,0 +1,23 @@
|
||||||
|
- name: last train test_last_train
|
||||||
|
command: nextflow run tests/software/last/train -entry test_last_train -c tests/config/nextflow.config
|
||||||
|
tags:
|
||||||
|
- last/train
|
||||||
|
- last
|
||||||
|
files:
|
||||||
|
- path: output/last/contigs.genome.par
|
||||||
|
contains:
|
||||||
|
- "score matrix"
|
||||||
|
- path: output/untar/lastdb/genome.bck
|
||||||
|
md5sum: 5519879b9b6c4d1fc508da7f17f88f2e
|
||||||
|
- path: output/untar/lastdb/genome.des
|
||||||
|
md5sum: 3a9ea6d336e113a74d7fdca5e7b623fc
|
||||||
|
- path: output/untar/lastdb/genome.prj
|
||||||
|
md5sum: 489715f14b0fea6273822696e72357f9
|
||||||
|
- path: output/untar/lastdb/genome.sds
|
||||||
|
md5sum: 2cd381f4f8a9c52cfcd323a2863eccb2
|
||||||
|
- path: output/untar/lastdb/genome.ssp
|
||||||
|
md5sum: 4137fb6fe9df2b3d78d5b960390aac7b
|
||||||
|
- path: output/untar/lastdb/genome.suf
|
||||||
|
md5sum: 1895efa8653e8e9bd3605cff0408ed33
|
||||||
|
- path: output/untar/lastdb/genome.tis
|
||||||
|
md5sum: b7c40f06b1309dc6f37849eeb86dfd22
|
Loading…
Reference in a new issue