Add nextclade/datasetget and nextclade/run modules (#1210)

* Remove old nextclade module

* Add nextclade/datasetget and nextclade/run modules

* Fix ECLint

* Add reference and tag as inputs to datasetget module

* Fix tests

* Adjust spacing
This commit is contained in:
Harshil Patel 2022-01-17 10:46:15 +00:00 committed by GitHub
parent 6243c3792c
commit 796dbb573e
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
14 changed files with 217 additions and 82 deletions

View file

@ -0,0 +1,39 @@
process NEXTCLADE_DATASETGET {
tag "$dataset"
label 'process_low'
conda (params.enable_conda ? "bioconda::nextclade=1.9.0" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/nextclade:1.9.0--h9ee0642_0' :
'quay.io/biocontainers/nextclade:1.9.0--h9ee0642_0' }"
input:
val dataset
val reference
val tag
output:
path "$prefix" , emit: dataset
path "versions.yml", emit: versions
script:
def args = task.ext.args ?: ''
prefix = task.ext.prefix ?: "${dataset}"
def fasta = reference ? "--reference ${reference}" : ''
def version = tag ? "--tag ${tag}" : ''
"""
nextclade \\
dataset \\
get \\
$args \\
--name $dataset \\
$fasta \\
$version \\
--output-dir $prefix
cat <<-END_VERSIONS > versions.yml
"${task.process}":
nextclade: \$(nextclade --version 2>&1)
END_VERSIONS
"""
}

View file

@ -0,0 +1,42 @@
name: nextclade_datasetget
description: Get dataset for SARS-CoV-2 genome clade assignment, mutation calling, and sequence quality checks (C++ implementation)
keywords:
- nextclade
- variant
- consensus
tools:
- nextclade:
description: SARS-CoV-2 genome clade assignment, mutation calling, and sequence quality checks
homepage: https://github.com/nextstrain/nextclade
documentation: https://github.com/nextstrain/nextclade
tool_dev_url: https://github.com/nextstrain/nextclade
doi: ""
licence: ['MIT']
input:
- dataset:
type: string
description: Name of dataset to retrieve. A list of available datasets can be obtained using the nextclade dataset list command.
pattern: ".+"
- reference:
type: string
description: Accession id to download dataset based on a particular reference sequence. A list of available datasets can be obtained using the nextclade dataset list command.
pattern: ".+"
- tag:
type: string
description: Version tag of the dataset to download. A list of available datasets can be obtained using the nextclade dataset list command.
pattern: ".+"
output:
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- prefix:
type: path
description: A directory containing the dataset files needed for nextclade run
pattern: "prefix"
authors:
- "@antunderwood"
- "@drpatelh"

View file

@ -1,40 +0,0 @@
process NEXTCLADE {
tag "$meta.id"
label 'process_low'
conda (params.enable_conda ? "bioconda::nextclade_js=0.14.4" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/nextclade_js:0.14.4--h9ee0642_0' :
'quay.io/biocontainers/nextclade_js:0.14.4--h9ee0642_0' }"
input:
tuple val(meta), path(fasta)
output:
tuple val(meta), path("${prefix}.csv") , emit: csv
tuple val(meta), path("${prefix}.json") , emit: json
tuple val(meta), path("${prefix}.tree.json") , emit: json_tree
tuple val(meta), path("${prefix}.tsv") , emit: tsv
tuple val(meta), path("${prefix}.clades.tsv"), optional:true, emit: tsv_clades
path "versions.yml" , emit: versions
script:
def args = task.ext.args ?: ''
prefix = task.ext.prefix ?: "${meta.id}"
"""
nextclade \\
$args \\
--jobs $task.cpus \\
--input-fasta $fasta \\
--output-json ${prefix}.json \\
--output-csv ${prefix}.csv \\
--output-tsv ${prefix}.tsv \\
--output-tsv-clades-only ${prefix}.clades.tsv \\
--output-tree ${prefix}.tree.json
cat <<-END_VERSIONS > versions.yml
"${task.process}":
nextclade: \$(nextclade --version 2>&1)
END_VERSIONS
"""
}

View file

@ -0,0 +1,42 @@
process NEXTCLADE_RUN {
tag "$meta.id"
label 'process_low'
conda (params.enable_conda ? "bioconda::nextclade=1.9.0" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/nextclade:1.9.0--h9ee0642_0' :
'quay.io/biocontainers/nextclade:1.9.0--h9ee0642_0' }"
input:
tuple val(meta), path(fasta)
path dataset
output:
tuple val(meta), path("${prefix}.csv") , emit: csv
tuple val(meta), path("${prefix}.tsv") , emit: tsv
tuple val(meta), path("${prefix}.json") , emit: json
tuple val(meta), path("${prefix}.tree.json"), emit: json_tree
path "versions.yml" , emit: versions
script:
def args = task.ext.args ?: ''
prefix = task.ext.prefix ?: "${meta.id}"
"""
nextclade \\
run \\
$args \\
--jobs $task.cpus \\
--input-fasta $fasta \\
--input-dataset $dataset \\
--output-csv ${prefix}.csv \\
--output-tsv ${prefix}.tsv \\
--output-json ${prefix}.json \\
--output-tree ${prefix}.tree.json \\
--output-basename ${prefix}
cat <<-END_VERSIONS > versions.yml
"${task.process}":
nextclade: \$(nextclade --version 2>&1)
END_VERSIONS
"""
}

View file

@ -1,17 +1,17 @@
name: nextclade name: nextclade_run
description: SARS-CoV-2 genome clade assignment, mutation calling, and sequence quality checks (Javascript implementation) description: SARS-CoV-2 genome clade assignment, mutation calling, and sequence quality checks (C++ implementation)
keywords: keywords:
- nextclade - nextclade
- variant - variant
- consensus - consensus
tools: tools:
- nextclade: - nextclade:
description: SARS-CoV-2 genome clade assignment, mutation calling, and sequence quality checks (Javascript implementation) description: SARS-CoV-2 genome clade assignment, mutation calling, and sequence quality checks
homepage: https://clades.nextstrain.org homepage: https://github.com/nextstrain/nextclade
documentation: None documentation: https://github.com/nextstrain/nextclade
tool_dev_url: https://github.com/nextstrain/nextclade tool_dev_url: https://github.com/nextstrain/nextclade
doi: "" doi: ""
licence: ["MIT"] licence: ['MIT']
input: input:
- meta: - meta:
@ -19,6 +19,10 @@ input:
description: | description: |
Groovy Map containing sample information Groovy Map containing sample information
e.g. [ id:'test', single_end:false ] e.g. [ id:'test', single_end:false ]
- dataset:
type: path
description: Path containing the dataset files obtained by running nextclade dataset get
pattern: "*"
- fasta: - fasta:
type: file type: file
description: FASTA file containing one or more consensus sequences description: FASTA file containing one or more consensus sequences
@ -50,10 +54,7 @@ output:
type: file type: file
description: TSV file containing nextclade results description: TSV file containing nextclade results
pattern: "*.{tsv}" pattern: "*.{tsv}"
- tsv_clades:
type: file
description: TSV file containing nextclade results for clades only
pattern: "*.{clades.tsv}"
authors: authors:
- "@antunderwood"
- "@drpatelh" - "@drpatelh"

View file

@ -1017,9 +1017,13 @@ ncbigenomedownload:
- modules/ncbigenomedownload/** - modules/ncbigenomedownload/**
- tests/modules/ncbigenomedownload/** - tests/modules/ncbigenomedownload/**
nextclade: nextclade/datasetget:
- modules/nextclade/** - modules/nextclade/datasetget/**
- tests/modules/nextclade/** - tests/modules/nextclade/datasetget/**
nextclade/run:
- modules/nextclade/run/**
- tests/modules/nextclade/run/**
ngmaster: ngmaster:
- modules/ngmaster/** - modules/ngmaster/**

View file

@ -0,0 +1,14 @@
#!/usr/bin/env nextflow
nextflow.enable.dsl = 2
include { NEXTCLADE_DATASETGET } from '../../../../modules/nextclade/datasetget/main.nf'
workflow test_nextclade_datasetget {
dataset = 'sars-cov-2'
reference = 'MN908947'
tag = '2022-01-05T19:54:31Z'
NEXTCLADE_DATASETGET ( dataset, reference, tag )
}

View file

@ -1,5 +1,5 @@
process { process {
publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
} }

View file

@ -0,0 +1,20 @@
- name: nextclade datasetget
command: nextflow run ./tests/modules/nextclade/datasetget -entry test_nextclade_datasetget -c ./tests/config/nextflow.config -c ./tests/modules/nextclade/datasetget/nextflow.config
tags:
- nextclade
- nextclade/datasetget
files:
- path: output/nextclade/sars-cov-2/genemap.gff
md5sum: 21ebebfd0f416eeb959e55ebffd808e8
- path: output/nextclade/sars-cov-2/primers.csv
md5sum: 5990c3483bf66ce607aeb90a44e7ef2e
- path: output/nextclade/sars-cov-2/qc.json
md5sum: 018fa0c0b0d2e824954e37e01495d549
- path: output/nextclade/sars-cov-2/reference.fasta
md5sum: c7ce05f28e4ec0322c96f24e064ef55c
- path: output/nextclade/sars-cov-2/sequences.fasta
md5sum: 41129d255b99e0e92bdf20e866b99a1b
- path: output/nextclade/sars-cov-2/tag.json
md5sum: 2f6d8e806d9064571ee4188ef1304c9c
- path: output/nextclade/sars-cov-2/tree.json
md5sum: f8fb33ed62b59142ac20998eb599df6c

View file

@ -1,14 +0,0 @@
#!/usr/bin/env nextflow
nextflow.enable.dsl = 2
include { NEXTCLADE } from '../../../modules/nextclade/main.nf'
workflow test_nextclade {
input = [
[ id:'test', single_end:false ], // meta map
file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
]
NEXTCLADE ( input )
}

View file

@ -0,0 +1,22 @@
#!/usr/bin/env nextflow
nextflow.enable.dsl = 2
include { NEXTCLADE_DATASETGET } from '../../../../modules/nextclade/datasetget/main.nf'
include { NEXTCLADE_RUN } from '../../../../modules/nextclade/run/main.nf'
workflow test_nextclade_run {
dataset = 'sars-cov-2'
reference = 'MN908947'
tag = '2022-01-05T19:54:31Z'
NEXTCLADE_DATASETGET ( dataset, reference, tag )
input = [
[ id:'test', single_end:false ], // meta map
file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
]
NEXTCLADE_RUN ( input, NEXTCLADE_DATASETGET.out.dataset )
}

View file

@ -0,0 +1,5 @@
process {
publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
}

View file

@ -0,0 +1,13 @@
- name: nextclade run
command: nextflow run ./tests/modules/nextclade/run -entry test_nextclade_run -c ./tests/config/nextflow.config -c ./tests/modules/nextclade/run/nextflow.config
tags:
- nextclade
- nextclade/run
files:
- path: output/nextclade/test.json
- path: output/nextclade/test.csv
md5sum: 3b87a4da190ba2e1fdc8418dc3a7ffdb
- path: output/nextclade/test.tsv
md5sum: 449393288e8734a02def139c550a8d9b
- path: output/nextclade/test.tree.json
md5sum: 9c6e33cb7ff860bee6194847bd2c855c

View file

@ -1,13 +0,0 @@
- name: nextclade test_nextclade
command: nextflow run ./tests/modules/nextclade -entry test_nextclade -c ./tests/config/nextflow.config -c ./tests/modules/nextclade/nextflow.config
tags:
- nextclade
files:
- path: output/nextclade/test.json
md5sum: cab92830c5cb66076e7d6c054ea98362
- path: output/nextclade/test.csv
md5sum: 4f7096df9be51f99a0d62a38653b29cf
- path: output/nextclade/test.tsv
md5sum: fe07dc4ffcd81742ca9bef93f88e8836
- path: output/nextclade/test.tree.json
md5sum: 5c57dd724bc2b5cfde8f42a17ff2865a