Add nextclade/datasetget and nextclade/run modules (#1210)

* Remove old nextclade module * Add nextclade/datasetget and nextclade/run modules * Fix ECLint * Add reference and tag as inputs to datasetget module * Fix tests * Adjust spacing
2024-12-31 03:32:10 -05:00 · 2022-01-17 10:46:15 +00:00 · 2022-01-17 10:46:15 +00:00 · 796dbb573e
commit 796dbb573e
parent 6243c3792c
14 changed files with 217 additions and 82 deletions
--- a/modules/nextclade/datasetget/main.nf
+++ b/modules/nextclade/datasetget/main.nf
@ -0,0 +1,39 @@
 process NEXTCLADE_DATASETGET {
    tag "$dataset"
    label 'process_low'
    conda (params.enable_conda ? "bioconda::nextclade=1.9.0" : null)
    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
        'https://depot.galaxyproject.org/singularity/nextclade:1.9.0--h9ee0642_0' :
        'quay.io/biocontainers/nextclade:1.9.0--h9ee0642_0' }"
    input:
    val dataset
    val reference
    val tag
    output:
    path "$prefix"     , emit: dataset
    path "versions.yml", emit: versions
    script:
    def args = task.ext.args ?: ''
    prefix = task.ext.prefix ?: "${dataset}"
    def fasta = reference ? "--reference ${reference}" : ''
    def version = tag ? "--tag ${tag}" : ''
    """
    nextclade \\
        dataset \\
        get \\
        $args \\
        --name $dataset \\
        $fasta \\
        $version \\
        --output-dir $prefix
    cat <<-END_VERSIONS > versions.yml
    "${task.process}":
        nextclade: \$(nextclade --version 2>&1)
    END_VERSIONS
    """
 }
--- a/modules/nextclade/datasetget/meta.yml
+++ b/modules/nextclade/datasetget/meta.yml
@ -0,0 +1,42 @@
 name: nextclade_datasetget
 description: Get dataset for SARS-CoV-2 genome clade assignment, mutation calling, and sequence quality checks (C++ implementation)
 keywords:
  - nextclade
  - variant
  - consensus
 tools:
  - nextclade:
      description: SARS-CoV-2 genome clade assignment, mutation calling, and sequence quality checks
      homepage: https://github.com/nextstrain/nextclade
      documentation: https://github.com/nextstrain/nextclade
      tool_dev_url: https://github.com/nextstrain/nextclade
      doi: ""
      licence: ['MIT']
 input:
  - dataset:
      type: string
      description: Name of dataset to retrieve. A list of available datasets can be obtained using the nextclade dataset list command.
      pattern: ".+"
  - reference:
      type: string
      description: Accession id to download dataset based on a particular reference sequence. A list of available datasets can be obtained using the nextclade dataset list command.
      pattern: ".+"
  - tag:
      type: string
      description: Version tag of the dataset to download. A list of available datasets can be obtained using the nextclade dataset list command.
      pattern: ".+"
 output:
  - versions:
      type: file
      description: File containing software versions
      pattern: "versions.yml"
  - prefix:
      type: path
      description: A directory containing the dataset files needed for nextclade run
      pattern: "prefix"
 authors:
  - "@antunderwood"
  - "@drpatelh"
--- a/modules/nextclade/main.nf
+++ b/modules/nextclade/main.nf
@ -1,40 +0,0 @@
 process NEXTCLADE {
    tag "$meta.id"
    label 'process_low'
    conda (params.enable_conda ? "bioconda::nextclade_js=0.14.4" : null)
    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
        'https://depot.galaxyproject.org/singularity/nextclade_js:0.14.4--h9ee0642_0' :
        'quay.io/biocontainers/nextclade_js:0.14.4--h9ee0642_0' }"
    input:
    tuple val(meta), path(fasta)
    output:
    tuple val(meta), path("${prefix}.csv")       , emit: csv
    tuple val(meta), path("${prefix}.json")      , emit: json
    tuple val(meta), path("${prefix}.tree.json") , emit: json_tree
    tuple val(meta), path("${prefix}.tsv")       , emit: tsv
    tuple val(meta), path("${prefix}.clades.tsv"), optional:true, emit: tsv_clades
    path "versions.yml"                          , emit: versions
    script:
    def args = task.ext.args   ?: ''
    prefix   = task.ext.prefix ?: "${meta.id}"
    """
    nextclade \\
        $args \\
        --jobs $task.cpus \\
        --input-fasta $fasta \\
        --output-json ${prefix}.json \\
        --output-csv ${prefix}.csv \\
        --output-tsv ${prefix}.tsv \\
        --output-tsv-clades-only ${prefix}.clades.tsv \\
        --output-tree ${prefix}.tree.json
    cat <<-END_VERSIONS > versions.yml
    "${task.process}":
        nextclade: \$(nextclade --version 2>&1)
    END_VERSIONS
    """
 }
--- a/modules/nextclade/run/main.nf
+++ b/modules/nextclade/run/main.nf
@ -0,0 +1,42 @@
 process NEXTCLADE_RUN {
    tag "$meta.id"
    label 'process_low'
    conda (params.enable_conda ? "bioconda::nextclade=1.9.0" : null)
    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
        'https://depot.galaxyproject.org/singularity/nextclade:1.9.0--h9ee0642_0' :
        'quay.io/biocontainers/nextclade:1.9.0--h9ee0642_0' }"
    input:
    tuple val(meta), path(fasta)
    path dataset
    output:
    tuple val(meta), path("${prefix}.csv")      , emit: csv
    tuple val(meta), path("${prefix}.tsv")      , emit: tsv
    tuple val(meta), path("${prefix}.json")     , emit: json
    tuple val(meta), path("${prefix}.tree.json"), emit: json_tree
    path "versions.yml"                         , emit: versions
    script:
    def args = task.ext.args ?: ''
    prefix = task.ext.prefix ?: "${meta.id}"
    """
    nextclade \\
        run \\
        $args \\
        --jobs $task.cpus \\
        --input-fasta $fasta \\
        --input-dataset $dataset \\
        --output-csv ${prefix}.csv \\
        --output-tsv ${prefix}.tsv \\
        --output-json ${prefix}.json \\
        --output-tree ${prefix}.tree.json \\
        --output-basename ${prefix}
    cat <<-END_VERSIONS > versions.yml
    "${task.process}":
        nextclade: \$(nextclade --version 2>&1)
    END_VERSIONS
    """
 }
--- a/modules/nextclade/run/meta.yml
+++ b/modules/nextclade/run/meta.yml
@ -1,17 +1,17 @@
-name: nextclade
+name: nextclade_run
-description: SARS-CoV-2 genome clade assignment, mutation calling, and sequence quality checks (Javascript implementation)
+description: SARS-CoV-2 genome clade assignment, mutation calling, and sequence quality checks (C++ implementation)
 keywords:
  - nextclade
  - variant
  - consensus
 tools:
  - nextclade:
-      description: SARS-CoV-2 genome clade assignment, mutation calling, and sequence quality checks (Javascript implementation)
+      description: SARS-CoV-2 genome clade assignment, mutation calling, and sequence quality checks
-      homepage: https://clades.nextstrain.org
+      homepage: https://github.com/nextstrain/nextclade
-      documentation: None
+      documentation: https://github.com/nextstrain/nextclade
      tool_dev_url: https://github.com/nextstrain/nextclade
      doi: ""
-      licence: ["MIT"]
+      licence: ['MIT']
 input:
  - meta:
@ -19,6 +19,10 @@ input:
      description: |
        Groovy Map containing sample information
        e.g. [ id:'test', single_end:false ]
  - dataset:
      type: path
      description: Path containing the dataset files obtained by running nextclade dataset get
      pattern: "*"
  - fasta:
      type: file
      description: FASTA file containing one or more consensus sequences
@ -50,10 +54,7 @@ output:
      type: file
      description: TSV file containing nextclade results
      pattern: "*.{tsv}"
  - tsv_clades:
      type: file
      description: TSV file containing nextclade results for clades only
      pattern: "*.{clades.tsv}"
 authors:
  - "@antunderwood"
  - "@drpatelh"
--- a/tests/config/pytest_modules.yml
+++ b/tests/config/pytest_modules.yml
@ -1017,9 +1017,13 @@ ncbigenomedownload:
  - modules/ncbigenomedownload/**
  - tests/modules/ncbigenomedownload/**
-nextclade:
+nextclade/datasetget:
-  - modules/nextclade/**
+  - modules/nextclade/datasetget/**
-  - tests/modules/nextclade/**
+  - tests/modules/nextclade/datasetget/**
 nextclade/run:
  - modules/nextclade/run/**
  - tests/modules/nextclade/run/**
 ngmaster:
  - modules/ngmaster/**
--- a/tests/modules/nextclade/datasetget/main.nf
+++ b/tests/modules/nextclade/datasetget/main.nf
@ -0,0 +1,14 @@
 #!/usr/bin/env nextflow
 nextflow.enable.dsl = 2
 include { NEXTCLADE_DATASETGET } from '../../../../modules/nextclade/datasetget/main.nf'
 workflow test_nextclade_datasetget {
    dataset = 'sars-cov-2'
    reference = 'MN908947'
    tag = '2022-01-05T19:54:31Z'
    NEXTCLADE_DATASETGET ( dataset, reference, tag )
 }
--- a/tests/modules/nextclade/datasetget/nextflow.config
+++ b/tests/modules/nextclade/datasetget/nextflow.config
@ -1,5 +1,5 @@
 process {
    publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
-
+    
-}
+}
--- a/tests/modules/nextclade/datasetget/test.yml
+++ b/tests/modules/nextclade/datasetget/test.yml
@ -0,0 +1,20 @@
 - name: nextclade datasetget
  command: nextflow run ./tests/modules/nextclade/datasetget -entry test_nextclade_datasetget -c ./tests/config/nextflow.config -c ./tests/modules/nextclade/datasetget/nextflow.config
  tags:
    - nextclade
    - nextclade/datasetget
  files:
    - path: output/nextclade/sars-cov-2/genemap.gff
      md5sum: 21ebebfd0f416eeb959e55ebffd808e8
    - path: output/nextclade/sars-cov-2/primers.csv
      md5sum: 5990c3483bf66ce607aeb90a44e7ef2e
    - path: output/nextclade/sars-cov-2/qc.json
      md5sum: 018fa0c0b0d2e824954e37e01495d549
    - path: output/nextclade/sars-cov-2/reference.fasta
      md5sum: c7ce05f28e4ec0322c96f24e064ef55c
    - path: output/nextclade/sars-cov-2/sequences.fasta
      md5sum: 41129d255b99e0e92bdf20e866b99a1b
    - path: output/nextclade/sars-cov-2/tag.json
      md5sum: 2f6d8e806d9064571ee4188ef1304c9c
    - path: output/nextclade/sars-cov-2/tree.json
      md5sum: f8fb33ed62b59142ac20998eb599df6c
--- a/tests/modules/nextclade/main.nf
+++ b/tests/modules/nextclade/main.nf
@ -1,14 +0,0 @@
 #!/usr/bin/env nextflow
 nextflow.enable.dsl = 2
 include { NEXTCLADE } from '../../../modules/nextclade/main.nf'
 workflow test_nextclade {
    input = [ 
        [ id:'test', single_end:false ], // meta map
        file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
    ]
    NEXTCLADE ( input )
 }
--- a/tests/modules/nextclade/run/main.nf
+++ b/tests/modules/nextclade/run/main.nf
@ -0,0 +1,22 @@
 #!/usr/bin/env nextflow
 nextflow.enable.dsl = 2
 include { NEXTCLADE_DATASETGET } from '../../../../modules/nextclade/datasetget/main.nf'
 include { NEXTCLADE_RUN        } from '../../../../modules/nextclade/run/main.nf'
 workflow test_nextclade_run {
    dataset = 'sars-cov-2'
    reference = 'MN908947'
    tag = '2022-01-05T19:54:31Z'
    NEXTCLADE_DATASETGET ( dataset, reference, tag )
    input = [
        [ id:'test', single_end:false ], // meta map
        file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
    ]
    NEXTCLADE_RUN ( input, NEXTCLADE_DATASETGET.out.dataset )
 }
--- a/tests/modules/nextclade/run/nextflow.config
+++ b/tests/modules/nextclade/run/nextflow.config
@ -0,0 +1,5 @@
 process {
    publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
 }
--- a/tests/modules/nextclade/run/test.yml
+++ b/tests/modules/nextclade/run/test.yml
@ -0,0 +1,13 @@
 - name: nextclade run
  command: nextflow run ./tests/modules/nextclade/run -entry test_nextclade_run -c ./tests/config/nextflow.config -c ./tests/modules/nextclade/run/nextflow.config
  tags:
    - nextclade
    - nextclade/run
  files:
    - path: output/nextclade/test.json
    - path: output/nextclade/test.csv
      md5sum: 3b87a4da190ba2e1fdc8418dc3a7ffdb
    - path: output/nextclade/test.tsv
      md5sum: 449393288e8734a02def139c550a8d9b
    - path: output/nextclade/test.tree.json
      md5sum: 9c6e33cb7ff860bee6194847bd2c855c
--- a/tests/modules/nextclade/test.yml
+++ b/tests/modules/nextclade/test.yml
@ -1,13 +0,0 @@
 - name: nextclade test_nextclade
  command: nextflow run ./tests/modules/nextclade -entry test_nextclade -c ./tests/config/nextflow.config -c ./tests/modules/nextclade/nextflow.config
  tags:
    - nextclade
  files:
    - path: output/nextclade/test.json
      md5sum: cab92830c5cb66076e7d6c054ea98362
    - path: output/nextclade/test.csv
      md5sum: 4f7096df9be51f99a0d62a38653b29cf
    - path: output/nextclade/test.tsv
      md5sum: fe07dc4ffcd81742ca9bef93f88e8836
    - path: output/nextclade/test.tree.json
      md5sum: 5c57dd724bc2b5cfde8f42a17ff2865a