mirror of
https://github.com/MillironX/nf-core_modules.git
synced 2024-11-14 05:43:08 +00:00
New module last/mafswap to reorder sequences in alignments (#500)
* New module last/mafswap to reorder sequences in alignments The `maf-swap` tool distributed with [LAST](https://gitlab.com/mcfrith/last) reorders sequences in alignment files in Multiple Alignment Format. When run without command-line arguments, it will swap the target and the query sequences. This is useful when turning a many-to-many alignment into a many-to-one and then a one-to-one alignment in conjunction with the `last-split` command (split, swap, split and swap again). The LAST aligner outputs MAF files, but other tools also use this format. As MAF files can be very large (up to hundreds of gigabytes), the module expects its input to be compressed with gzip and will compress its output. This new module is part of the work described in Issue #464. During this development, we fix the version of LAST to 1219 to ensure consistency (hence ignore lint's version warning). * Update MD5 sum. Actually, 7029066c27ac6f5ef18d660d5741979a is the MD5 sum of an empty file compressed with `gzip --no-name`… This happened because I forgot to update the config file after correcting the module… sorry ! * Apply suggestions from code review Co-authored-by: Harshil Patel <drpatelh@users.noreply.github.com> * Change name as suggested in pull request. Co-authored-by: Harshil Patel <drpatelh@users.noreply.github.com>
This commit is contained in:
parent
b592cea30b
commit
e75f88c68a
7 changed files with 172 additions and 0 deletions
70
software/last/mafswap/functions.nf
Normal file
70
software/last/mafswap/functions.nf
Normal file
|
@ -0,0 +1,70 @@
|
||||||
|
/*
|
||||||
|
* -----------------------------------------------------
|
||||||
|
* Utility functions used in nf-core DSL2 module files
|
||||||
|
* -----------------------------------------------------
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Extract name of software tool from process name using $task.process
|
||||||
|
*/
|
||||||
|
def getSoftwareName(task_process) {
|
||||||
|
return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Function to initialise default values and to generate a Groovy Map of available options for nf-core modules
|
||||||
|
*/
|
||||||
|
def initOptions(Map args) {
|
||||||
|
def Map options = [:]
|
||||||
|
options.args = args.args ?: ''
|
||||||
|
options.args2 = args.args2 ?: ''
|
||||||
|
options.args3 = args.args3 ?: ''
|
||||||
|
options.publish_by_meta = args.publish_by_meta ?: []
|
||||||
|
options.publish_dir = args.publish_dir ?: ''
|
||||||
|
options.publish_files = args.publish_files
|
||||||
|
options.suffix = args.suffix ?: ''
|
||||||
|
return options
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Tidy up and join elements of a list to return a path string
|
||||||
|
*/
|
||||||
|
def getPathFromList(path_list) {
|
||||||
|
def paths = path_list.findAll { item -> !item?.trim().isEmpty() } // Remove empty entries
|
||||||
|
paths = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes
|
||||||
|
return paths.join('/')
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Function to save/publish module results
|
||||||
|
*/
|
||||||
|
def saveFiles(Map args) {
|
||||||
|
if (!args.filename.endsWith('.version.txt')) {
|
||||||
|
def ioptions = initOptions(args.options)
|
||||||
|
def path_list = [ ioptions.publish_dir ?: args.publish_dir ]
|
||||||
|
if (ioptions.publish_by_meta) {
|
||||||
|
def key_list = ioptions.publish_by_meta instanceof List ? ioptions.publish_by_meta : args.publish_by_meta
|
||||||
|
for (key in key_list) {
|
||||||
|
if (args.meta && key instanceof String) {
|
||||||
|
def path = key
|
||||||
|
if (args.meta.containsKey(key)) {
|
||||||
|
path = args.meta[key] instanceof Boolean ? "${key}_${args.meta[key]}".toString() : args.meta[key]
|
||||||
|
}
|
||||||
|
path = path instanceof String ? path : ''
|
||||||
|
path_list.add(path)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (ioptions.publish_files instanceof Map) {
|
||||||
|
for (ext in ioptions.publish_files) {
|
||||||
|
if (args.filename.endsWith(ext.key)) {
|
||||||
|
def ext_list = path_list.collect()
|
||||||
|
ext_list.add(ext.value)
|
||||||
|
return "${getPathFromList(ext_list)}/$args.filename"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else if (ioptions.publish_files == null) {
|
||||||
|
return "${getPathFromList(path_list)}/$args.filename"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
37
software/last/mafswap/main.nf
Normal file
37
software/last/mafswap/main.nf
Normal file
|
@ -0,0 +1,37 @@
|
||||||
|
// Import generic module functions
|
||||||
|
include { initOptions; saveFiles; getSoftwareName } from './functions'
|
||||||
|
|
||||||
|
params.options = [:]
|
||||||
|
options = initOptions(params.options)
|
||||||
|
|
||||||
|
process LAST_MAFSWAP {
|
||||||
|
tag "$meta.id"
|
||||||
|
label 'process_low'
|
||||||
|
publishDir "${params.outdir}",
|
||||||
|
mode: params.publish_dir_mode,
|
||||||
|
saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:meta, publish_by_meta:['id']) }
|
||||||
|
|
||||||
|
conda (params.enable_conda ? "bioconda::last=1219" : null)
|
||||||
|
if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) {
|
||||||
|
container "https://depot.galaxyproject.org/singularity/last:1219--h2e03b76_0"
|
||||||
|
} else {
|
||||||
|
container "quay.io/biocontainers/last:1219--h2e03b76_0"
|
||||||
|
}
|
||||||
|
|
||||||
|
input:
|
||||||
|
tuple val(meta), path(maf)
|
||||||
|
|
||||||
|
output:
|
||||||
|
tuple val(meta), path("*.maf.gz"), emit: maf
|
||||||
|
path "*.version.txt" , emit: version
|
||||||
|
|
||||||
|
script:
|
||||||
|
def software = getSoftwareName(task.process)
|
||||||
|
def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}"
|
||||||
|
"""
|
||||||
|
zcat $maf | maf-swap $options.args | gzip --no-name > ${prefix}.swapped.maf.gz
|
||||||
|
|
||||||
|
# maf-swap has no --version option but lastdb, part of the same package, has.
|
||||||
|
echo \$(lastdb --version 2>&1) | sed 's/lastdb //' > ${software}.version.txt
|
||||||
|
"""
|
||||||
|
}
|
39
software/last/mafswap/meta.yml
Normal file
39
software/last/mafswap/meta.yml
Normal file
|
@ -0,0 +1,39 @@
|
||||||
|
name: last_mafswap
|
||||||
|
description: Reorder alignments in a MAF file
|
||||||
|
keywords:
|
||||||
|
- LAST
|
||||||
|
- reorder
|
||||||
|
- alignment
|
||||||
|
- MAF
|
||||||
|
tools:
|
||||||
|
- last:
|
||||||
|
description: LAST finds & aligns related regions of sequences.
|
||||||
|
homepage: https://gitlab.com/mcfrith/last
|
||||||
|
documentation: https://gitlab.com/mcfrith/last/-/blob/main/doc/
|
||||||
|
tool_dev_url: https://gitlab.com/mcfrith/last
|
||||||
|
doi: ""
|
||||||
|
licence: ['GPL-3.0-or-later']
|
||||||
|
|
||||||
|
input:
|
||||||
|
- meta:
|
||||||
|
type: map
|
||||||
|
description: |
|
||||||
|
Groovy Map containing sample information
|
||||||
|
e.g. [ id:'test', single_end:false ]
|
||||||
|
- maf:
|
||||||
|
type: file
|
||||||
|
description: Multiple Aligment Format (MAF) file, compressed with gzip
|
||||||
|
pattern: "*.{maf.gz}"
|
||||||
|
|
||||||
|
output:
|
||||||
|
- maf:
|
||||||
|
type: file
|
||||||
|
description: Multiple Aligment Format (MAF) file, compressed with gzip
|
||||||
|
pattern: "*.{maf.gz}"
|
||||||
|
- version:
|
||||||
|
type: file
|
||||||
|
description: File containing software version
|
||||||
|
pattern: "*.{version.txt}"
|
||||||
|
|
||||||
|
authors:
|
||||||
|
- "@charles-plessy"
|
|
@ -374,6 +374,10 @@ last/lastdb:
|
||||||
- software/last/lastdb/**
|
- software/last/lastdb/**
|
||||||
- tests/software/last/lastdb/**
|
- tests/software/last/lastdb/**
|
||||||
|
|
||||||
|
last/mafswap:
|
||||||
|
- software/last/mafswap/**
|
||||||
|
- tests/software/last/mafswap/**
|
||||||
|
|
||||||
last/train:
|
last/train:
|
||||||
- software/last/train/**
|
- software/last/train/**
|
||||||
- tests/software/last/train/**
|
- tests/software/last/train/**
|
||||||
|
|
|
@ -27,6 +27,7 @@ params {
|
||||||
all_sites_fas = "${test_data_dir}/genomics/sarscov2/genome/alignment/all_sites.fas"
|
all_sites_fas = "${test_data_dir}/genomics/sarscov2/genome/alignment/all_sites.fas"
|
||||||
informative_sites_fas = "${test_data_dir}/genomics/sarscov2/genome/alignment/informative_sites.fas"
|
informative_sites_fas = "${test_data_dir}/genomics/sarscov2/genome/alignment/informative_sites.fas"
|
||||||
|
|
||||||
|
contigs_genome_maf_gz = "${test_data_dir}/genomics/sarscov2/genome/alignment/last/contigs.genome.maf.gz"
|
||||||
lastdb_tar_gz = "${test_data_dir}/genomics/sarscov2/genome/alignment/last/lastdb.tar.gz"
|
lastdb_tar_gz = "${test_data_dir}/genomics/sarscov2/genome/alignment/last/lastdb.tar.gz"
|
||||||
}
|
}
|
||||||
'illumina' {
|
'illumina' {
|
||||||
|
|
13
tests/software/last/mafswap/main.nf
Normal file
13
tests/software/last/mafswap/main.nf
Normal file
|
@ -0,0 +1,13 @@
|
||||||
|
#!/usr/bin/env nextflow
|
||||||
|
|
||||||
|
nextflow.enable.dsl = 2
|
||||||
|
|
||||||
|
include { LAST_MAFSWAP } from '../../../../software/last/mafswap/main.nf' addParams( options: [:] )
|
||||||
|
|
||||||
|
workflow test_last_mafswap {
|
||||||
|
|
||||||
|
input = [ [ id:'contigs.genome' ], // meta map
|
||||||
|
file(params.test_data['sarscov2']['genome']['contigs_genome_maf_gz'], checkIfExists: true) ]
|
||||||
|
|
||||||
|
LAST_MAFSWAP ( input )
|
||||||
|
}
|
8
tests/software/last/mafswap/test.yml
Normal file
8
tests/software/last/mafswap/test.yml
Normal file
|
@ -0,0 +1,8 @@
|
||||||
|
- name: last mafswap test_last_mafswap
|
||||||
|
command: nextflow run tests/software/last/mafswap -entry test_last_mafswap -c tests/config/nextflow.config
|
||||||
|
tags:
|
||||||
|
- last
|
||||||
|
- last/mafswap
|
||||||
|
files:
|
||||||
|
- path: output/last/contigs.genome.swapped.maf.gz
|
||||||
|
md5sum: b98c5ff297878a19f1ab4f1a5e354678
|
Loading…
Reference in a new issue