diff --git a/modules/nextclade/datasetget/main.nf b/modules/nextclade/datasetget/main.nf new file mode 100644 index 00000000..55371168 --- /dev/null +++ b/modules/nextclade/datasetget/main.nf @@ -0,0 +1,39 @@ +process NEXTCLADE_DATASETGET { + tag "$dataset" + label 'process_low' + + conda (params.enable_conda ? "bioconda::nextclade=1.9.0" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/nextclade:1.9.0--h9ee0642_0' : + 'quay.io/biocontainers/nextclade:1.9.0--h9ee0642_0' }" + + input: + val dataset + val reference + val tag + + output: + path "$prefix" , emit: dataset + path "versions.yml", emit: versions + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${dataset}" + def fasta = reference ? "--reference ${reference}" : '' + def version = tag ? "--tag ${tag}" : '' + """ + nextclade \\ + dataset \\ + get \\ + $args \\ + --name $dataset \\ + $fasta \\ + $version \\ + --output-dir $prefix + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + nextclade: \$(nextclade --version 2>&1) + END_VERSIONS + """ +} diff --git a/modules/nextclade/datasetget/meta.yml b/modules/nextclade/datasetget/meta.yml new file mode 100644 index 00000000..1246d918 --- /dev/null +++ b/modules/nextclade/datasetget/meta.yml @@ -0,0 +1,42 @@ +name: nextclade_datasetget +description: Get dataset for SARS-CoV-2 genome clade assignment, mutation calling, and sequence quality checks (C++ implementation) +keywords: + - nextclade + - variant + - consensus +tools: + - nextclade: + description: SARS-CoV-2 genome clade assignment, mutation calling, and sequence quality checks + homepage: https://github.com/nextstrain/nextclade + documentation: https://github.com/nextstrain/nextclade + tool_dev_url: https://github.com/nextstrain/nextclade + doi: "" + licence: ['MIT'] + +input: + - dataset: + type: string + description: Name of dataset to retrieve. A list of available datasets can be obtained using the nextclade dataset list command. + pattern: ".+" + - reference: + type: string + description: Accession id to download dataset based on a particular reference sequence. A list of available datasets can be obtained using the nextclade dataset list command. + pattern: ".+" + - tag: + type: string + description: Version tag of the dataset to download. A list of available datasets can be obtained using the nextclade dataset list command. + pattern: ".+" + +output: + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - prefix: + type: path + description: A directory containing the dataset files needed for nextclade run + pattern: "prefix" + +authors: + - "@antunderwood" + - "@drpatelh" diff --git a/modules/nextclade/main.nf b/modules/nextclade/main.nf deleted file mode 100755 index f60af57b..00000000 --- a/modules/nextclade/main.nf +++ /dev/null @@ -1,40 +0,0 @@ -process NEXTCLADE { - tag "$meta.id" - label 'process_low' - - conda (params.enable_conda ? "bioconda::nextclade_js=0.14.4" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/nextclade_js:0.14.4--h9ee0642_0' : - 'quay.io/biocontainers/nextclade_js:0.14.4--h9ee0642_0' }" - - input: - tuple val(meta), path(fasta) - - output: - tuple val(meta), path("${prefix}.csv") , emit: csv - tuple val(meta), path("${prefix}.json") , emit: json - tuple val(meta), path("${prefix}.tree.json") , emit: json_tree - tuple val(meta), path("${prefix}.tsv") , emit: tsv - tuple val(meta), path("${prefix}.clades.tsv"), optional:true, emit: tsv_clades - path "versions.yml" , emit: versions - - script: - def args = task.ext.args ?: '' - prefix = task.ext.prefix ?: "${meta.id}" - """ - nextclade \\ - $args \\ - --jobs $task.cpus \\ - --input-fasta $fasta \\ - --output-json ${prefix}.json \\ - --output-csv ${prefix}.csv \\ - --output-tsv ${prefix}.tsv \\ - --output-tsv-clades-only ${prefix}.clades.tsv \\ - --output-tree ${prefix}.tree.json - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - nextclade: \$(nextclade --version 2>&1) - END_VERSIONS - """ -} diff --git a/modules/nextclade/run/main.nf b/modules/nextclade/run/main.nf new file mode 100644 index 00000000..e29dd8ce --- /dev/null +++ b/modules/nextclade/run/main.nf @@ -0,0 +1,42 @@ +process NEXTCLADE_RUN { + tag "$meta.id" + label 'process_low' + + conda (params.enable_conda ? "bioconda::nextclade=1.9.0" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/nextclade:1.9.0--h9ee0642_0' : + 'quay.io/biocontainers/nextclade:1.9.0--h9ee0642_0' }" + + input: + tuple val(meta), path(fasta) + path dataset + + output: + tuple val(meta), path("${prefix}.csv") , emit: csv + tuple val(meta), path("${prefix}.tsv") , emit: tsv + tuple val(meta), path("${prefix}.json") , emit: json + tuple val(meta), path("${prefix}.tree.json"), emit: json_tree + path "versions.yml" , emit: versions + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + """ + nextclade \\ + run \\ + $args \\ + --jobs $task.cpus \\ + --input-fasta $fasta \\ + --input-dataset $dataset \\ + --output-csv ${prefix}.csv \\ + --output-tsv ${prefix}.tsv \\ + --output-json ${prefix}.json \\ + --output-tree ${prefix}.tree.json \\ + --output-basename ${prefix} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + nextclade: \$(nextclade --version 2>&1) + END_VERSIONS + """ +} diff --git a/modules/nextclade/meta.yml b/modules/nextclade/run/meta.yml old mode 100755 new mode 100644 similarity index 75% rename from modules/nextclade/meta.yml rename to modules/nextclade/run/meta.yml index 1b4a435a..40a863e6 --- a/modules/nextclade/meta.yml +++ b/modules/nextclade/run/meta.yml @@ -1,17 +1,17 @@ -name: nextclade -description: SARS-CoV-2 genome clade assignment, mutation calling, and sequence quality checks (Javascript implementation) +name: nextclade_run +description: SARS-CoV-2 genome clade assignment, mutation calling, and sequence quality checks (C++ implementation) keywords: - nextclade - variant - consensus tools: - nextclade: - description: SARS-CoV-2 genome clade assignment, mutation calling, and sequence quality checks (Javascript implementation) - homepage: https://clades.nextstrain.org - documentation: None + description: SARS-CoV-2 genome clade assignment, mutation calling, and sequence quality checks + homepage: https://github.com/nextstrain/nextclade + documentation: https://github.com/nextstrain/nextclade tool_dev_url: https://github.com/nextstrain/nextclade doi: "" - licence: ["MIT"] + licence: ['MIT'] input: - meta: @@ -19,6 +19,10 @@ input: description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] + - dataset: + type: path + description: Path containing the dataset files obtained by running nextclade dataset get + pattern: "*" - fasta: type: file description: FASTA file containing one or more consensus sequences @@ -50,10 +54,7 @@ output: type: file description: TSV file containing nextclade results pattern: "*.{tsv}" - - tsv_clades: - type: file - description: TSV file containing nextclade results for clades only - pattern: "*.{clades.tsv}" authors: + - "@antunderwood" - "@drpatelh" diff --git a/tests/config/pytest_modules.yml b/tests/config/pytest_modules.yml index 80b27492..fc062e58 100644 --- a/tests/config/pytest_modules.yml +++ b/tests/config/pytest_modules.yml @@ -1017,9 +1017,13 @@ ncbigenomedownload: - modules/ncbigenomedownload/** - tests/modules/ncbigenomedownload/** -nextclade: - - modules/nextclade/** - - tests/modules/nextclade/** +nextclade/datasetget: + - modules/nextclade/datasetget/** + - tests/modules/nextclade/datasetget/** + +nextclade/run: + - modules/nextclade/run/** + - tests/modules/nextclade/run/** ngmaster: - modules/ngmaster/** diff --git a/tests/modules/nextclade/datasetget/main.nf b/tests/modules/nextclade/datasetget/main.nf new file mode 100644 index 00000000..0fc38a5b --- /dev/null +++ b/tests/modules/nextclade/datasetget/main.nf @@ -0,0 +1,14 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { NEXTCLADE_DATASETGET } from '../../../../modules/nextclade/datasetget/main.nf' + +workflow test_nextclade_datasetget { + + dataset = 'sars-cov-2' + reference = 'MN908947' + tag = '2022-01-05T19:54:31Z' + + NEXTCLADE_DATASETGET ( dataset, reference, tag ) +} diff --git a/tests/modules/nextclade/nextflow.config b/tests/modules/nextclade/datasetget/nextflow.config similarity index 95% rename from tests/modules/nextclade/nextflow.config rename to tests/modules/nextclade/datasetget/nextflow.config index 8730f1c4..50f50a7a 100644 --- a/tests/modules/nextclade/nextflow.config +++ b/tests/modules/nextclade/datasetget/nextflow.config @@ -1,5 +1,5 @@ process { publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } - -} + +} \ No newline at end of file diff --git a/tests/modules/nextclade/datasetget/test.yml b/tests/modules/nextclade/datasetget/test.yml new file mode 100644 index 00000000..7e798a11 --- /dev/null +++ b/tests/modules/nextclade/datasetget/test.yml @@ -0,0 +1,20 @@ +- name: nextclade datasetget + command: nextflow run ./tests/modules/nextclade/datasetget -entry test_nextclade_datasetget -c ./tests/config/nextflow.config -c ./tests/modules/nextclade/datasetget/nextflow.config + tags: + - nextclade + - nextclade/datasetget + files: + - path: output/nextclade/sars-cov-2/genemap.gff + md5sum: 21ebebfd0f416eeb959e55ebffd808e8 + - path: output/nextclade/sars-cov-2/primers.csv + md5sum: 5990c3483bf66ce607aeb90a44e7ef2e + - path: output/nextclade/sars-cov-2/qc.json + md5sum: 018fa0c0b0d2e824954e37e01495d549 + - path: output/nextclade/sars-cov-2/reference.fasta + md5sum: c7ce05f28e4ec0322c96f24e064ef55c + - path: output/nextclade/sars-cov-2/sequences.fasta + md5sum: 41129d255b99e0e92bdf20e866b99a1b + - path: output/nextclade/sars-cov-2/tag.json + md5sum: 2f6d8e806d9064571ee4188ef1304c9c + - path: output/nextclade/sars-cov-2/tree.json + md5sum: f8fb33ed62b59142ac20998eb599df6c \ No newline at end of file diff --git a/tests/modules/nextclade/main.nf b/tests/modules/nextclade/main.nf deleted file mode 100755 index 15750990..00000000 --- a/tests/modules/nextclade/main.nf +++ /dev/null @@ -1,14 +0,0 @@ -#!/usr/bin/env nextflow - -nextflow.enable.dsl = 2 - -include { NEXTCLADE } from '../../../modules/nextclade/main.nf' - -workflow test_nextclade { - input = [ - [ id:'test', single_end:false ], // meta map - file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) - ] - - NEXTCLADE ( input ) -} diff --git a/tests/modules/nextclade/run/main.nf b/tests/modules/nextclade/run/main.nf new file mode 100644 index 00000000..35b52317 --- /dev/null +++ b/tests/modules/nextclade/run/main.nf @@ -0,0 +1,22 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { NEXTCLADE_DATASETGET } from '../../../../modules/nextclade/datasetget/main.nf' +include { NEXTCLADE_RUN } from '../../../../modules/nextclade/run/main.nf' + +workflow test_nextclade_run { + + dataset = 'sars-cov-2' + reference = 'MN908947' + tag = '2022-01-05T19:54:31Z' + + NEXTCLADE_DATASETGET ( dataset, reference, tag ) + + input = [ + [ id:'test', single_end:false ], // meta map + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + + NEXTCLADE_RUN ( input, NEXTCLADE_DATASETGET.out.dataset ) +} diff --git a/tests/modules/nextclade/run/nextflow.config b/tests/modules/nextclade/run/nextflow.config new file mode 100644 index 00000000..50f50a7a --- /dev/null +++ b/tests/modules/nextclade/run/nextflow.config @@ -0,0 +1,5 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + +} \ No newline at end of file diff --git a/tests/modules/nextclade/run/test.yml b/tests/modules/nextclade/run/test.yml new file mode 100644 index 00000000..37f7d8ae --- /dev/null +++ b/tests/modules/nextclade/run/test.yml @@ -0,0 +1,13 @@ +- name: nextclade run + command: nextflow run ./tests/modules/nextclade/run -entry test_nextclade_run -c ./tests/config/nextflow.config -c ./tests/modules/nextclade/run/nextflow.config + tags: + - nextclade + - nextclade/run + files: + - path: output/nextclade/test.json + - path: output/nextclade/test.csv + md5sum: 3b87a4da190ba2e1fdc8418dc3a7ffdb + - path: output/nextclade/test.tsv + md5sum: 449393288e8734a02def139c550a8d9b + - path: output/nextclade/test.tree.json + md5sum: 9c6e33cb7ff860bee6194847bd2c855c diff --git a/tests/modules/nextclade/test.yml b/tests/modules/nextclade/test.yml deleted file mode 100755 index 36218aad..00000000 --- a/tests/modules/nextclade/test.yml +++ /dev/null @@ -1,13 +0,0 @@ -- name: nextclade test_nextclade - command: nextflow run ./tests/modules/nextclade -entry test_nextclade -c ./tests/config/nextflow.config -c ./tests/modules/nextclade/nextflow.config - tags: - - nextclade - files: - - path: output/nextclade/test.json - md5sum: cab92830c5cb66076e7d6c054ea98362 - - path: output/nextclade/test.csv - md5sum: 4f7096df9be51f99a0d62a38653b29cf - - path: output/nextclade/test.tsv - md5sum: fe07dc4ffcd81742ca9bef93f88e8836 - - path: output/nextclade/test.tree.json - md5sum: 5c57dd724bc2b5cfde8f42a17ff2865a