add nanolyse module (from nanoseq modules) (#471)

* add nanolyse modules * add clean.fastq.gz path and md5sum * fix errors * remove unreproducible md5sum * solve linting problem * address PR suggestions * GET_NANOLYSE_FASTA as a local module * Update software/nanolyse/main.nf Co-authored-by: Harshil Patel <drpatelh@users.noreply.github.com> * Update software/nanolyse/main.nf Co-authored-by: Harshil Patel <drpatelh@users.noreply.github.com> * Update software/nanolyse/main.nf Co-authored-by: Harshil Patel <drpatelh@users.noreply.github.com> * Update software/nanolyse/main.nf Co-authored-by: Harshil Patel <drpatelh@users.noreply.github.com> * Update software/nanolyse/meta.yml Co-authored-by: Harshil Patel <drpatelh@users.noreply.github.com> * Update software/nanolyse/meta.yml Co-authored-by: Harshil Patel <drpatelh@users.noreply.github.com> * Update tests/software/nanolyse/test.yml Co-authored-by: Harshil Patel <drpatelh@users.noreply.github.com> * Update tests/software/nanolyse/main.nf Co-authored-by: Harshil Patel <drpatelh@users.noreply.github.com> * Update tests/software/nanolyse/main.nf Co-authored-by: Harshil Patel <drpatelh@users.noreply.github.com> * update to the version from nf-core/tools-dev * input and output files cannot have the same names * Update test.yml * Update software/nanolyse/main.nf Co-authored-by: Harshil Patel <drpatelh@users.noreply.github.com> * Update tests/software/nanolyse/main.nf Co-authored-by: Harshil Patel <drpatelh@users.noreply.github.com> * Update test.yml * revert Co-authored-by: Harshil Patel <drpatelh@users.noreply.github.com>
2024-12-22 11:08:17 +00:00 · 2021-05-05 18:20:09 +08:00 · 2021-05-05 18:20:09 +08:00 · faf77d6fee
commit faf77d6fee
parent e3cf4c822c
6 changed files with 189 additions and 0 deletions
--- a/software/nanolyse/functions.nf
+++ b/software/nanolyse/functions.nf
@ -0,0 +1,70 @@
 /*
 * -----------------------------------------------------
 *  Utility functions used in nf-core DSL2 module files
 * -----------------------------------------------------
 */
 /*
 * Extract name of software tool from process name using $task.process
 */
 def getSoftwareName(task_process) {
    return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()
 }
 /*
 * Function to initialise default values and to generate a Groovy Map of available options for nf-core modules
 */
 def initOptions(Map args) {
    def Map options = [:]
    options.args            = args.args ?: ''
    options.args2           = args.args2 ?: ''
    options.args3           = args.args3 ?: ''
    options.publish_by_meta = args.publish_by_meta ?: []
    options.publish_dir     = args.publish_dir ?: ''
    options.publish_files   = args.publish_files
    options.suffix          = args.suffix ?: ''
    return options
 }
 /*
 * Tidy up and join elements of a list to return a path string
 */
 def getPathFromList(path_list) {
    def paths = path_list.findAll { item -> !item?.trim().isEmpty() }      // Remove empty entries
    paths     = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes
    return paths.join('/')
 }
 /*
 * Function to save/publish module results
 */
 def saveFiles(Map args) {
    if (!args.filename.endsWith('.version.txt')) {
        def ioptions  = initOptions(args.options)
        def path_list = [ ioptions.publish_dir ?: args.publish_dir ]
        if (ioptions.publish_by_meta) {
            def key_list = ioptions.publish_by_meta instanceof List ? ioptions.publish_by_meta : args.publish_by_meta
            for (key in key_list) {
                if (args.meta && key instanceof String) {
                    def path = key
                    if (args.meta.containsKey(key)) {
                        path = args.meta[key] instanceof Boolean ? "${key}_${args.meta[key]}".toString() : args.meta[key]
                    }
                    path = path instanceof String ? path : ''
                    path_list.add(path)
                }
            }
        }
        if (ioptions.publish_files instanceof Map) {
            for (ext in ioptions.publish_files) {
                if (args.filename.endsWith(ext.key)) {
                    def ext_list = path_list.collect()
                    ext_list.add(ext.value)
                    return "${getPathFromList(ext_list)}/$args.filename"
                }
            }
        } else if (ioptions.publish_files == null) {
            return "${getPathFromList(path_list)}/$args.filename"
        }
    }
 }
--- a/software/nanolyse/main.nf
+++ b/software/nanolyse/main.nf
@ -0,0 +1,39 @@
 // Import generic module functions
 include { initOptions; saveFiles; getSoftwareName } from './functions'
 params.options = [:]
 def options    = initOptions(params.options)
 process NANOLYSE {
    tag "$meta.id"
    label 'process_low'
    publishDir "${params.outdir}",
        mode: params.publish_dir_mode,
        saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:meta, publish_by_meta:['id']) }
    conda (params.enable_conda ? "bioconda::nanolyse=1.2.0" : null)
    if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) {
        container "https://depot.galaxyproject.org/singularity/nanolyse:1.2.0--py_0"
    } else {
        container "quay.io/biocontainers/nanolyse:1.2.0--py_0"
    }
    input:
    tuple val(meta), path(fastq)
    path fasta
    output:
    tuple val(meta), path("*.fastq.gz"), emit: fastq
    path "*.log"                       , emit: log
    path "*.version.txt"               , emit: version
    script:
    def software = getSoftwareName(task.process)
    def prefix   = options.suffix ? "${meta.id}.${options.suffix}" : "${meta.id}"
    """
    gunzip -c $fastq | NanoLyse -r $fasta | gzip > ${prefix}.fastq.gz
    mv NanoLyse.log ${prefix}.nanolyse.log
    echo \$(NanoLyse --version 2>&1) | sed -e "s/NanoLyse //g" > ${software}.version.txt
    """
 }
--- a/software/nanolyse/meta.yml
+++ b/software/nanolyse/meta.yml
@ -0,0 +1,46 @@
 name: nanolyse
 description: DNA contaminant removal using NanoLyse
 keywords:
    - contaminant_removal
 tools:
    - nanolyse:
        description: |
            DNA contaminant removal using NanoLyse
        homepage: https://github.com/wdecoster/nanolyse
        documentation: https://github.com/wdecoster/nanolyse#nanolyse
 input:
    - meta:
        type: map
        description: |
            Groovy Map containing sample information
            e.g. [ id:'test', single_end:false ]
    - fastq:
        type: file
        description: |
            Basecalled reads in FASTQ.GZ format
        pattern: "*.fastq.gz"
    - fasta:
        type: file
        description: |
            A reference fasta file against which to filter.
        pattern: "*.fasta"
 output:
    - meta:
        type: map
        description: |
            Groovy Map containing sample information
            e.g. [ id:'test', single_end:false ]
    - fastq:
        type: file
        description: Reads with contaminants removed in FASTQ format
        pattern: "*.fastq.gz"
    - log:
        type: file
        description: Log of the Nanolyse run.
        pattern: "*.log"
    - version:
        type: file
        description: File containing software version
        pattern: "*.{version.txt}"
 authors:
    - "@yuukiiwa"
--- a/tests/config/pytest_software.yml
+++ b/tests/config/pytest_software.yml
@ -403,6 +403,10 @@ multiqc:
  - software/multiqc/**
  - tests/software/multiqc/**
 nanolyse:
  - software/nanolyse/**
  - tests/software/nanolyse/**
 nanoplot:
  - software/nanoplot/**
  - tests/software/nanoplot/**
--- a/tests/software/nanolyse/main.nf
+++ b/tests/software/nanolyse/main.nf
@ -0,0 +1,24 @@
 #!/usr/bin/env nextflow
 nextflow.enable.dsl = 2
 include { NANOLYSE } from '../../../software/nanolyse/main.nf' addParams( options: [suffix: 'clean'] )
 process GET_NANOLYSE_FASTA {
    output:
    path "*fasta.gz", emit: fasta
    script:
    """
    wget https://github.com/wdecoster/nanolyse/raw/master/reference/lambda.fasta.gz
    """
 }
 workflow test_nanolyse {
    input = [ [ id:'test' ], // meta map
              [ file(params.test_data['sarscov2']['nanopore']['test_fastq_gz'], checkIfExists: true)]
            ]
    GET_NANOLYSE_FASTA()
    NANOLYSE ( input, GET_NANOLYSE_FASTA.out.fasta )
 }
--- a/tests/software/nanolyse/test.yml
+++ b/tests/software/nanolyse/test.yml
@ -0,0 +1,6 @@
 - name: nanolyse
  command: nextflow run ./tests/software/nanolyse -entry test_nanolyse -c tests/config/nextflow.config
  tags:
    - nanolyse
  files:
    - path: ./output/nanolyse/test.clean.fastq.gz