Update MALT to older version due to major bug in most recent version (#1796)

* Rollback version

* Start work rollback to 0.4.1

* Further changes

* Re do dynamic flag creation

* Prettier.
This commit is contained in:
James A. Fellows Yates 2022-06-20 08:20:22 +02:00 committed by GitHub
parent 6e7c0e945b
commit be8d7b3293
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 73 additions and 79 deletions

View file

@ -2,16 +2,17 @@ process MALT_BUILD {
label 'process_high'
conda (params.enable_conda ? "bioconda::malt=0.53" : null)
conda (params.enable_conda ? "bioconda::malt=0.41" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/malt:0.53--hdfd78af_0' :
'quay.io/biocontainers/malt:0.53--hdfd78af_0' }"
'https://depot.galaxyproject.org/singularity/malt:0.41--1' :
'quay.io/biocontainers/malt:0.41--1' }"
input:
path fastas
val seq_type
path gff
path map_db
path mapping_file
val mapping_type
val mapping_db
output:
path "malt_index/" , emit: index
@ -29,19 +30,36 @@ process MALT_BUILD {
} else {
avail_mem = task.memory.giga
}
def igff = gff ? "-igff ${gff}" : ""
def valid_db = ['eggnog', 'interpro2go', 'kegg', 'seed', 'taxonomy']
if ( !valid_db.contains(mapping_db) ) { error "Unrecognised mapping database value for MALT_BUILD. Options: eggnog, interpro2go, kegg, seed, taxonomy" }
switch ( "${mapping_type}" ) {
case "gi":
mapping_prefix = "-g"; break
case "ref":
if ( mapping_db == "taxonomy" ) {
mapping_prefix = '-a'
} else {
mapping_prefix = "-r"
};break
case "syn":
mapping_prefix = "-s"; break
default:
error '[MALT_BUILD] Mapping type not recognised. Options: gi, ref, syn'; break
}
type_flag = mapping_prefix + '2' + mapping_db + " " + mapping_file
"""
malt-build \\
-J-Xmx${avail_mem}g \\
-v \\
--input ${fastas.join(' ')} \\
-s $seq_type \\
$igff \\
-d 'malt_index/' \\
-t $task.cpus \\
$args \\
-mdb ${map_db}/*.db |&tee malt-build.log
$type_flag |&tee malt-build.log
cat <<-END_VERSIONS > versions.yml
"${task.process}":

View file

@ -22,20 +22,24 @@ tools:
input:
- fastas:
type: file
description: Directory of, or FASTA reference files for indexing
description: Directory of, or list of FASTA reference files for indexing
pattern: "*/|*.fasta"
- seq_type:
type: string
description: Type of input data
pattern: "DNA|Protein"
- gff:
- mapping_file:
type: file
description: Directory of, or GFF3 files of input FASTA files
pattern: "*/|*.gff|*.gff3"
- map_db:
type: file
description: MEGAN .db file from https://software-ab.informatik.uni-tuebingen.de/download/megan6/welcome.html
pattern:
description: An unzipped MEGAN mapping file in .abin format (pre-2020), downloadable from https://software-ab.informatik.uni-tuebingen.de/download/megan6/old.html
pattern: "*.abin"
- mapping_type:
type: value
description: What type of accession to use for database construction.
pattern: "gi|ref|syn"
- mapping_type:
type: value
description: Which database the mapping file is derived from.
pattern: "eggnog|interpro2go|kegg|seed|taxonomy"
output:
- versions:

View file

@ -2,10 +2,10 @@ process MALT_RUN {
tag "$meta.id"
label 'process_high'
conda (params.enable_conda ? "bioconda::malt=0.53" : null)
conda (params.enable_conda ? "bioconda::malt=0.41" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/malt:0.53--hdfd78af_0' :
'quay.io/biocontainers/malt:0.53--hdfd78af_0' }"
'https://depot.galaxyproject.org/singularity/malt:0.41--1' :
'quay.io/biocontainers/malt:0.41--1' }"
input:
tuple val(meta), path(fastqs)
@ -33,7 +33,6 @@ process MALT_RUN {
"""
malt-run \\
-J-Xmx${avail_mem}g \\
-t $task.cpus \\
-v \\
-o . \\

View file

@ -6,21 +6,12 @@ include { UNZIP } from '../../../../modules/unzip/main.nf'
include { MALT_BUILD } from '../../../../modules/malt/build/main.nf'
workflow test_malt_build {
fastas = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
seq_type = "DNA"
gff = []
map_db = [ [], file("https://software-ab.informatik.uni-tuebingen.de/download/megan6/megan-nucl-Jan2021.db.zip", checkIfExists: true) ]
fastas = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
seq_type = "DNA"
map_accession = [ [], file("https://software-ab.informatik.uni-tuebingen.de/download/megan6/nucl_acc2tax-Jul2019.abin.zip", checkIfExists: true) ]
mapping_type = 'ref'
mapping_db = 'taxonomy'
UNZIP ( map_db )
MALT_BUILD ( fastas, seq_type, gff, UNZIP.out.unzipped_archive.map{ it[1] } )
}
workflow test_malt_build_gff {
fastas = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
seq_type = "DNA"
gff = file(params.test_data['sarscov2']['genome']['genome_gff3'], checkIfExists: true)
map_db = [ [], file("https://software-ab.informatik.uni-tuebingen.de/download/megan6/megan-nucl-Jan2021.db.zip", checkIfExists: true) ]
UNZIP ( map_db )
MALT_BUILD ( fastas, seq_type, gff, UNZIP.out.unzipped_archive.map{ it[1] } )
UNZIP ( map_accession )
MALT_BUILD ( fastas, seq_type, UNZIP.out.unzipped_archive.map{ it[1] }, "ref", "taxonomy" )
}

View file

@ -1,13 +1,16 @@
- name: malt build
command: nextflow run ./tests/modules/malt/build_test -entry test_malt_build -c ./tests/config/nextflow.config -c ./tests/modules/malt/build/nextflow.config
- name: malt build test_malt_build
command: nextflow run ./tests/modules/malt/build -entry test_malt_build -c ./tests/config/nextflow.config -c ./tests/modules/malt/build/nextflow.config
tags:
- malt
- malt/build
files:
- path: output/malt/malt-build.log
contains:
- "Peak memory"
- path: output/malt/malt_index/index0.idx
md5sum: 1954f2c00b418d00112829b0a6adb8ce
- path: output/malt/malt_index/ref.db
md5sum: 772a09aeb162515485b037604399f2bd
md5sum: 1fb74eccd5400fb23454454da1bd4c0c
- path: output/malt/malt_index/ref.idx
md5sum: 7dea362b3fac8e00956a4952a3d4f474
- path: output/malt/malt_index/ref.inf
@ -15,34 +18,10 @@
- path: output/malt/malt_index/table0.db
- path: output/malt/malt_index/table0.idx
- path: output/malt/malt_index/taxonomy.idx
md5sum: bb335e7c378a5bd85761b6eeed16d984
md5sum: 13aa81314892b5537319fca2a63a2c31
- path: output/malt/malt_index/taxonomy.map
md5sum: f6b05bbab2149e388cb769098e14d433
md5sum: 1e972302ae6d705b8abb377cfafd380a
- path: output/malt/malt_index/taxonomy.tre
md5sum: bde26a1fff5c63d3046d3863607a1e70
- name: malt build gff
command: nextflow run ./tests/modules/malt/build_test -entry test_malt_build_gff -c ./tests/config/nextflow.config -c ./tests/modules/malt/build/nextflow.config
tags:
- malt
- malt/build
files:
- path: output/malt/malt_index/aadd.dbx
md5sum: 4e2ed57e713d5372bd09350f447cdf53
- path: output/malt/malt_index/aadd.idx
md5sum: 0994061bc8673ebd283fa6546c3dd12c
- path: output/malt/malt_index/index0.idx
md5sum: 1954f2c00b418d00112829b0a6adb8ce
- path: output/malt/malt_index/ref.db
md5sum: 772a09aeb162515485b037604399f2bd
- path: output/malt/malt_index/ref.idx
md5sum: 7dea362b3fac8e00956a4952a3d4f474
- path: output/malt/malt_index/ref.inf
md5sum: b146842067cf278ef1d23e6c2e7c0c35
- path: output/malt/malt_index/table0.db
- path: output/malt/malt_index/table0.idx
- path: output/malt/malt_index/taxonomy.idx
md5sum: bb335e7c378a5bd85761b6eeed16d984
- path: output/malt/malt_index/taxonomy.map
md5sum: f6b05bbab2149e388cb769098e14d433
- path: output/malt/malt_index/taxonomy.tre
md5sum: bde26a1fff5c63d3046d3863607a1e70
md5sum: 79c2322475a8eebc57607ff36c1e4728
- path: output/unzip/nucl_acc2tax-Jul2019.abin/nucl_acc2tax-Jul2019.abin
md5sum: b2deb1df50ebcec4de71a6beec6226d7

View file

@ -8,18 +8,19 @@ include { MALT_RUN } from '../../../../modules/malt/run/main.nf'
workflow test_malt_run {
fastas = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
gff = file(params.test_data['sarscov2']['genome']['genome_gff3'], checkIfExists: true)
seq_type = "DNA"
map_db = [ [], file("https://software-ab.informatik.uni-tuebingen.de/download/megan6/megan-nucl-Jan2021.db.zip", checkIfExists: true) ]
fastas = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
seq_type = "DNA"
map_accession = [ [], file("https://software-ab.informatik.uni-tuebingen.de/download/megan6/nucl_acc2tax-Jul2019.abin.zip", checkIfExists: true) ]
mapping_type = 'ref'
mapping_db = 'taxonomy'
input = [
[ id:'test', single_end:false ], // meta map
file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true)
]
mode = "BlastN"
UNZIP ( map_db )
MALT_BUILD ( fastas, seq_type, gff, UNZIP.out.unzipped_archive.map { it[1] } )
UNZIP ( map_accession )
MALT_BUILD ( fastas, seq_type, UNZIP.out.unzipped_archive.map{ it[1] }, "ref", "taxonomy" )
MALT_RUN ( input, mode, MALT_BUILD.out.index )
}

View file

@ -1,8 +1,10 @@
- name: malt run
command: nextflow run ./tests/modules/malt/run -entry test_malt_run -c ./tests/config/nextflow.config -c ./tests/modules/malt/run/nextflow.config
- name: malt run test_malt_run
command: nextflow run ./tests/modules/malt/run -entry test_malt_run -c ./tests/config/nextflow.config -c ./tests/modules/malt/run/nextflow.config
tags:
- malt
- malt/run
- malt
files:
- path: output/malt/test_1.rma6
- path: output/malt/test-malt-run.log
contains:
- "Peak memory"
- path: output/malt/test_1.rma6