Add nextclade/datasetget and nextclade/run modules (#1210)

* Remove old nextclade module * Add nextclade/datasetget and nextclade/run modules * Fix ECLint * Add reference and tag as inputs to datasetget module * Fix tests * Adjust spacing
2024-12-22 02:58:17 +00:00 · 2022-01-17 10:46:15 +00:00 · 2022-01-17 10:46:15 +00:00 · 796dbb573e
commit 796dbb573e
parent 6243c3792c
14 changed files with 217 additions and 82 deletions
--- a/modules/nextclade/datasetget/main.nf
+++ b/modules/nextclade/datasetget/main.nf
@ -0,0 +1,39 @@
+process NEXTCLADE_DATASETGET {
+    tag "$dataset"
+    label 'process_low'
+
+    conda (params.enable_conda ? "bioconda::nextclade=1.9.0" : null)
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/nextclade:1.9.0--h9ee0642_0' :
+        'quay.io/biocontainers/nextclade:1.9.0--h9ee0642_0' }"
+
+    input:
+    val dataset
+    val reference
+    val tag
+
+    output:
+    path "$prefix"     , emit: dataset
+    path "versions.yml", emit: versions
+
+    script:
+    def args = task.ext.args ?: ''
+    prefix = task.ext.prefix ?: "${dataset}"
+    def fasta = reference ? "--reference ${reference}" : ''
+    def version = tag ? "--tag ${tag}" : ''
+    """
+    nextclade \\
+        dataset \\
+        get \\
+        $args \\
+        --name $dataset \\
+        $fasta \\
+        $version \\
+        --output-dir $prefix
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        nextclade: \$(nextclade --version 2>&1)
+    END_VERSIONS
+    """
+}
--- a/modules/nextclade/datasetget/meta.yml
+++ b/modules/nextclade/datasetget/meta.yml
@ -0,0 +1,42 @@
+name: nextclade_datasetget
+description: Get dataset for SARS-CoV-2 genome clade assignment, mutation calling, and sequence quality checks (C++ implementation)
+keywords:
+  - nextclade
+  - variant
+  - consensus
+tools:
+  - nextclade:
+      description: SARS-CoV-2 genome clade assignment, mutation calling, and sequence quality checks
+      homepage: https://github.com/nextstrain/nextclade
+      documentation: https://github.com/nextstrain/nextclade
+      tool_dev_url: https://github.com/nextstrain/nextclade
+      doi: ""
+      licence: ['MIT']
+
+input:
+  - dataset:
+      type: string
+      description: Name of dataset to retrieve. A list of available datasets can be obtained using the nextclade dataset list command.
+      pattern: ".+"
+  - reference:
+      type: string
+      description: Accession id to download dataset based on a particular reference sequence. A list of available datasets can be obtained using the nextclade dataset list command.
+      pattern: ".+"
+  - tag:
+      type: string
+      description: Version tag of the dataset to download. A list of available datasets can be obtained using the nextclade dataset list command.
+      pattern: ".+"
+
+output:
+  - versions:
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
+  - prefix:
+      type: path
+      description: A directory containing the dataset files needed for nextclade run
+      pattern: "prefix"
+
+authors:
+  - "@antunderwood"
+  - "@drpatelh"
--- a/modules/nextclade/main.nf
+++ b/modules/nextclade/main.nf
@ -1,40 +0,0 @@
-process NEXTCLADE {
-    tag "$meta.id"
-    label 'process_low'
-
-    conda (params.enable_conda ? "bioconda::nextclade_js=0.14.4" : null)
-    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/nextclade_js:0.14.4--h9ee0642_0' :
-        'quay.io/biocontainers/nextclade_js:0.14.4--h9ee0642_0' }"
-
-    input:
-    tuple val(meta), path(fasta)
-
-    output:
-    tuple val(meta), path("${prefix}.csv")       , emit: csv
-    tuple val(meta), path("${prefix}.json")      , emit: json
-    tuple val(meta), path("${prefix}.tree.json") , emit: json_tree
-    tuple val(meta), path("${prefix}.tsv")       , emit: tsv
-    tuple val(meta), path("${prefix}.clades.tsv"), optional:true, emit: tsv_clades
-    path "versions.yml"                          , emit: versions
-
-    script:
-    def args = task.ext.args   ?: ''
-    prefix   = task.ext.prefix ?: "${meta.id}"
-    """
-    nextclade \\
-        $args \\
-        --jobs $task.cpus \\
-        --input-fasta $fasta \\
-        --output-json ${prefix}.json \\
-        --output-csv ${prefix}.csv \\
-        --output-tsv ${prefix}.tsv \\
-        --output-tsv-clades-only ${prefix}.clades.tsv \\
-        --output-tree ${prefix}.tree.json
-
-    cat <<-END_VERSIONS > versions.yml
-    "${task.process}":
-        nextclade: \$(nextclade --version 2>&1)
-    END_VERSIONS
-    """
-}
--- a/modules/nextclade/run/main.nf
+++ b/modules/nextclade/run/main.nf
@ -0,0 +1,42 @@
+process NEXTCLADE_RUN {
+    tag "$meta.id"
+    label 'process_low'
+
+    conda (params.enable_conda ? "bioconda::nextclade=1.9.0" : null)
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/nextclade:1.9.0--h9ee0642_0' :
+        'quay.io/biocontainers/nextclade:1.9.0--h9ee0642_0' }"
+
+    input:
+    tuple val(meta), path(fasta)
+    path dataset
+
+    output:
+    tuple val(meta), path("${prefix}.csv")      , emit: csv
+    tuple val(meta), path("${prefix}.tsv")      , emit: tsv
+    tuple val(meta), path("${prefix}.json")     , emit: json
+    tuple val(meta), path("${prefix}.tree.json"), emit: json_tree
+    path "versions.yml"                         , emit: versions
+
+    script:
+    def args = task.ext.args ?: ''
+    prefix = task.ext.prefix ?: "${meta.id}"
+    """
+    nextclade \\
+        run \\
+        $args \\
+        --jobs $task.cpus \\
+        --input-fasta $fasta \\
+        --input-dataset $dataset \\
+        --output-csv ${prefix}.csv \\
+        --output-tsv ${prefix}.tsv \\
+        --output-json ${prefix}.json \\
+        --output-tree ${prefix}.tree.json \\
+        --output-basename ${prefix}
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        nextclade: \$(nextclade --version 2>&1)
+    END_VERSIONS
+    """
+}
--- a/modules/nextclade/run/meta.yml
+++ b/modules/nextclade/run/meta.yml
@ -1,17 +1,17 @@
-name: nextclade
-description: SARS-CoV-2 genome clade assignment, mutation calling, and sequence quality checks (Javascript implementation)
+name: nextclade_run
+description: SARS-CoV-2 genome clade assignment, mutation calling, and sequence quality checks (C++ implementation)
 keywords:
  - nextclade
  - variant
  - consensus
 tools:
  - nextclade:
-      description: SARS-CoV-2 genome clade assignment, mutation calling, and sequence quality checks (Javascript implementation)
-      homepage: https://clades.nextstrain.org
-      documentation: None
+      description: SARS-CoV-2 genome clade assignment, mutation calling, and sequence quality checks
+      homepage: https://github.com/nextstrain/nextclade
+      documentation: https://github.com/nextstrain/nextclade
      tool_dev_url: https://github.com/nextstrain/nextclade
      doi: ""
-      licence: ["MIT"]
+      licence: ['MIT']

 input:
  - meta:
@ -19,6 +19,10 @@ input:
      description: |
        Groovy Map containing sample information
        e.g. [ id:'test', single_end:false ]
+  - dataset:
+      type: path
+      description: Path containing the dataset files obtained by running nextclade dataset get
+      pattern: "*"
  - fasta:
      type: file
      description: FASTA file containing one or more consensus sequences
@ -50,10 +54,7 @@ output:
      type: file
      description: TSV file containing nextclade results
      pattern: "*.{tsv}"
-  - tsv_clades:
-      type: file
-      description: TSV file containing nextclade results for clades only
-      pattern: "*.{clades.tsv}"

 authors:
+  - "@antunderwood"
  - "@drpatelh"
--- a/tests/config/pytest_modules.yml
+++ b/tests/config/pytest_modules.yml
@ -1017,9 +1017,13 @@ ncbigenomedownload:
  - modules/ncbigenomedownload/**
  - tests/modules/ncbigenomedownload/**

-nextclade:
-  - modules/nextclade/**
-  - tests/modules/nextclade/**
+nextclade/datasetget:
+  - modules/nextclade/datasetget/**
+  - tests/modules/nextclade/datasetget/**
+
+nextclade/run:
+  - modules/nextclade/run/**
+  - tests/modules/nextclade/run/**

 ngmaster:
  - modules/ngmaster/**
--- a/tests/modules/nextclade/datasetget/main.nf
+++ b/tests/modules/nextclade/datasetget/main.nf
@ -0,0 +1,14 @@
+#!/usr/bin/env nextflow
+
+nextflow.enable.dsl = 2
+
+include { NEXTCLADE_DATASETGET } from '../../../../modules/nextclade/datasetget/main.nf'
+
+workflow test_nextclade_datasetget {
+
+    dataset = 'sars-cov-2'
+    reference = 'MN908947'
+    tag = '2022-01-05T19:54:31Z'
+
+    NEXTCLADE_DATASETGET ( dataset, reference, tag )
+}
--- a/tests/modules/nextclade/datasetget/nextflow.config
+++ b/tests/modules/nextclade/datasetget/nextflow.config
@ -1,5 +1,5 @@
 process {

    publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
-
-}
+    
+}
--- a/tests/modules/nextclade/datasetget/test.yml
+++ b/tests/modules/nextclade/datasetget/test.yml
@ -0,0 +1,20 @@
+- name: nextclade datasetget
+  command: nextflow run ./tests/modules/nextclade/datasetget -entry test_nextclade_datasetget -c ./tests/config/nextflow.config -c ./tests/modules/nextclade/datasetget/nextflow.config
+  tags:
+    - nextclade
+    - nextclade/datasetget
+  files:
+    - path: output/nextclade/sars-cov-2/genemap.gff
+      md5sum: 21ebebfd0f416eeb959e55ebffd808e8
+    - path: output/nextclade/sars-cov-2/primers.csv
+      md5sum: 5990c3483bf66ce607aeb90a44e7ef2e
+    - path: output/nextclade/sars-cov-2/qc.json
+      md5sum: 018fa0c0b0d2e824954e37e01495d549
+    - path: output/nextclade/sars-cov-2/reference.fasta
+      md5sum: c7ce05f28e4ec0322c96f24e064ef55c
+    - path: output/nextclade/sars-cov-2/sequences.fasta
+      md5sum: 41129d255b99e0e92bdf20e866b99a1b
+    - path: output/nextclade/sars-cov-2/tag.json
+      md5sum: 2f6d8e806d9064571ee4188ef1304c9c
+    - path: output/nextclade/sars-cov-2/tree.json
+      md5sum: f8fb33ed62b59142ac20998eb599df6c
--- a/tests/modules/nextclade/main.nf
+++ b/tests/modules/nextclade/main.nf
@ -1,14 +0,0 @@
-#!/usr/bin/env nextflow
-
-nextflow.enable.dsl = 2
-
-include { NEXTCLADE } from '../../../modules/nextclade/main.nf'
-
-workflow test_nextclade {
-    input = [ 
-        [ id:'test', single_end:false ], // meta map
-        file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
-    ]
-    
-    NEXTCLADE ( input )
-}
--- a/tests/modules/nextclade/run/main.nf
+++ b/tests/modules/nextclade/run/main.nf
@ -0,0 +1,22 @@
+#!/usr/bin/env nextflow
+
+nextflow.enable.dsl = 2
+
+include { NEXTCLADE_DATASETGET } from '../../../../modules/nextclade/datasetget/main.nf'
+include { NEXTCLADE_RUN        } from '../../../../modules/nextclade/run/main.nf'
+
+workflow test_nextclade_run {
+
+    dataset = 'sars-cov-2'
+    reference = 'MN908947'
+    tag = '2022-01-05T19:54:31Z'
+
+    NEXTCLADE_DATASETGET ( dataset, reference, tag )
+
+    input = [
+        [ id:'test', single_end:false ], // meta map
+        file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
+    ]
+
+    NEXTCLADE_RUN ( input, NEXTCLADE_DATASETGET.out.dataset )
+}
--- a/tests/modules/nextclade/run/nextflow.config
+++ b/tests/modules/nextclade/run/nextflow.config
@ -0,0 +1,5 @@
+process {
+
+    publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
+    
+}
--- a/tests/modules/nextclade/run/test.yml
+++ b/tests/modules/nextclade/run/test.yml
@ -0,0 +1,13 @@
+- name: nextclade run
+  command: nextflow run ./tests/modules/nextclade/run -entry test_nextclade_run -c ./tests/config/nextflow.config -c ./tests/modules/nextclade/run/nextflow.config
+  tags:
+    - nextclade
+    - nextclade/run
+  files:
+    - path: output/nextclade/test.json
+    - path: output/nextclade/test.csv
+      md5sum: 3b87a4da190ba2e1fdc8418dc3a7ffdb
+    - path: output/nextclade/test.tsv
+      md5sum: 449393288e8734a02def139c550a8d9b
+    - path: output/nextclade/test.tree.json
+      md5sum: 9c6e33cb7ff860bee6194847bd2c855c
--- a/tests/modules/nextclade/test.yml
+++ b/tests/modules/nextclade/test.yml
@ -1,13 +0,0 @@
- name: nextclade test_nextclade
-  command: nextflow run ./tests/modules/nextclade -entry test_nextclade -c ./tests/config/nextflow.config -c ./tests/modules/nextclade/nextflow.config
-  tags:
-    - nextclade
-  files:
-    - path: output/nextclade/test.json
-      md5sum: cab92830c5cb66076e7d6c054ea98362
-    - path: output/nextclade/test.csv
-      md5sum: 4f7096df9be51f99a0d62a38653b29cf
-    - path: output/nextclade/test.tsv
-      md5sum: fe07dc4ffcd81742ca9bef93f88e8836
-    - path: output/nextclade/test.tree.json
-      md5sum: 5c57dd724bc2b5cfde8f42a17ff2865a