Add blast/tblastn module (#2092)

* cherry pick the module files from sanger-tol branch and update the test data from official nf-core test datasets

* Update modules/blast/tblastn/main.nf, align the emit

Co-authored-by: nvnieuwk <101190534+nvnieuwk@users.noreply.github.com>

Co-authored-by: nvnieuwk <101190534+nvnieuwk@users.noreply.github.com>
This commit is contained in:
Guoying Qi 2022-09-26 16:43:41 +01:00 committed by GitHub
parent d1c8bddaaa
commit 8e21005d53
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 134 additions and 0 deletions

View file

@ -0,0 +1,37 @@
process BLAST_TBLASTN {
tag "$meta.id"
label 'process_medium'
conda (params.enable_conda ? 'bioconda::blast=2.12.0' : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/blast:2.12.0--pl5262h3289130_0' :
'quay.io/biocontainers/blast:2.12.0--pl5262h3289130_0' }"
input:
tuple val(meta), path(fasta)
path db
output:
tuple val(meta), path('*.tblastn.txt') , emit: txt
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
"""
DB=`find -L ./ -name "*.nsq" | sed 's/.nsq//'`
tblastn \\
-num_threads $task.cpus \\
-db \$DB \\
-query $fasta \\
$args \\
-out ${prefix}.tblastn.txt
cat <<-END_VERSIONS > versions.yml
"${task.process}":
blast: \$(tblastn -version 2>&1 | sed 's/^.*tblastn: //; s/ .*\$//')
END_VERSIONS
"""
}

View file

@ -0,0 +1,41 @@
name: blast_tblastn
description: Queries a BLAST DNA database
keywords:
- fasta
- blast
- tblastn
- DNA sequence
tools:
- blast:
description: |
Protein to Translated Nucleotide BLAST.
homepage: https://blast.ncbi.nlm.nih.gov/Blast.cgi
documentation: https://blast.ncbi.nlm.nih.gov/Blast.cgi?CMD=Web&PAGE_TYPE=Blastdocs
doi: 10.1016/S0022-2836(05)80360-2
licence: ["US-Government-Work"]
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- fasta:
type: file
description: Input fasta file containing queries sequences
pattern: "*.{fa,fasta}"
- db:
type: directory
description: Directory containing blast database
pattern: "*"
output:
- txt:
type: file
description: File containing blastn hits
pattern: "*.{tblastn.txt}"
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
authors:
- "@yumisims"
- "@gq2"

View file

@ -379,6 +379,10 @@ blast/makeblastdb:
- modules/blast/makeblastdb/**
- tests/modules/blast/makeblastdb/**
blast/tblastn:
- modules/blast/tblastn/**
- tests/modules/blast/tblastn/**
bowtie/align:
- modules/bowtie/align/**
- modules/bowtie/build/**

View file

@ -0,0 +1,15 @@
#!/usr/bin/env nextflow
nextflow.enable.dsl = 2
include { BLAST_TBLASTN } from '../../../../modules/blast/tblastn/main.nf'
include { BLAST_MAKEBLASTDB } from '../../../../modules/blast/makeblastdb/main.nf'
workflow test_blast_tblastn {
input = [ file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) ]
input_pep = [ file(params.test_data['sarscov2']['genome']['proteome_fasta'], checkIfExists: true) ]
BLAST_MAKEBLASTDB ( input )
BLAST_TBLASTN ( [ [id:'test'], input_pep ], BLAST_MAKEBLASTDB.out.db )
}

View file

@ -0,0 +1,12 @@
process {
withName: BLAST_MAKEBLASTDB {
publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
ext.args = '-dbtype nucl'
}
withName: BLAST_TBLASTN {
publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
ext.args = '-outfmt 6 -task tblastn -evalue 0.001 -qcov_hsp_perc 60 -max_target_seqs 1'
}
}

View file

@ -0,0 +1,25 @@
- name: blast tblastn
command: nextflow run ./tests/modules/blast/tblastn -entry test_blast_tblastn -c ./tests/config/nextflow.config -c ./tests/modules/blast/tblastn/nextflow.config
tags:
- blast
- blast/tblastn
files:
- path: ./output/blast/test.tblastn.txt
md5sum: c26ccfa5ceae82fc6affdc77f5505b62
- path: ./output/blast/versions.yml
md5sum: 6d23563b8d75e2fe1e03d3fbe10098a9
- path: ./output/blast/blast_db/genome.fasta.nsq
md5sum: 982cbc7d9e38743b9b1037588862b9da
- path: ./output/blast/blast_db/genome.fasta.nin
- path: ./output/blast/blast_db/genome.fasta
md5sum: 6e9fe4042a72f2345f644f239272b7e6
- path: ./output/blast/blast_db/genome.fasta.nhr
md5sum: f4b4ddb034fd3dd7b25c89e9d50c004e
- path: ./output/blast/blast_db/genome.fasta.ndb
md5sum: 0d553c830656469211de113c5022f06d
- path: ./output/blast/blast_db/genome.fasta.not
md5sum: 1e53e9d08f1d23af0299cfa87478a7bb
- path: ./output/blast/blast_db/genome.fasta.nto
md5sum: 33cdeccccebe80329f1fdbee7f5874cb
- path: ./output/blast/blast_db/genome.fasta.ntf
md5sum: de1250813f0c7affc6d12dac9d0fb6bb