adding SplitNCigarReads

2024-12-31 03:32:10 -05:00 · 2021-02-19 14:59:29 +01:00 · 2021-02-19 14:59:29 +01:00 · bb5c0ceca5
commit bb5c0ceca5
parent 0202162b17
6 changed files with 189 additions and 0 deletions
--- a/software/gatk4/splitncigarreads/functions.nf
+++ b/software/gatk4/splitncigarreads/functions.nf
@ -0,0 +1,59 @@
+/*
+ * -----------------------------------------------------
+ *  Utility functions used in nf-core DSL2 module files
+ * -----------------------------------------------------
+ */
+
+/*
+ * Extract name of software tool from process name using $task.process
+ */
+def getSoftwareName(task_process) {
+    return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()
+}
+
+/*
+ * Function to initialise default values and to generate a Groovy Map of available options for nf-core modules
+ */
+def initOptions(Map args) {
+    def Map options = [:]
+    options.args          = args.args ?: ''
+    options.args2         = args.args2 ?: ''
+    options.publish_by_id = args.publish_by_id ?: false
+    options.publish_dir   = args.publish_dir ?: ''
+    options.publish_files = args.publish_files
+    options.suffix        = args.suffix ?: ''
+    return options
+}
+
+/*
+ * Tidy up and join elements of a list to return a path string
+ */
+def getPathFromList(path_list) {
+    def paths = path_list.findAll { item -> !item?.trim().isEmpty() }  // Remove empty entries
+    paths = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes
+    return paths.join('/')
+}
+
+/*
+ * Function to save/publish module results
+ */
+def saveFiles(Map args) {
+    if (!args.filename.endsWith('.version.txt')) {
+        def ioptions = initOptions(args.options)
+        def path_list = [ ioptions.publish_dir ?: args.publish_dir ]
+        if (ioptions.publish_by_id) {
+            path_list.add(args.publish_id)
+        }
+        if (ioptions.publish_files instanceof Map) {
+            for (ext in ioptions.publish_files) {
+                if (args.filename.endsWith(ext.key)) {
+                    def ext_list = path_list.collect()
+                    ext_list.add(ext.value)
+                    return "${getPathFromList(ext_list)}/$args.filename"
+                }
+            }
+        } else if (ioptions.publish_files == null) {
+            return "${getPathFromList(path_list)}/$args.filename"
+        }
+    }
+}
--- a/software/gatk4/splitncigarreads/main.nf
+++ b/software/gatk4/splitncigarreads/main.nf
@ -0,0 +1,43 @@
+// Import generic module functions
+include { initOptions; saveFiles; getSoftwareName } from './functions'
+
+params.options = [:]
+def options    = initOptions(params.options)
+
+process GATK4_SPLITNCIGARREADS {
+    tag "$meta.id"
+    label 'process_medium'
+    publishDir "${params.outdir}",
+        mode: params.publish_dir_mode,
+        saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), publish_id:meta.id) }
+
+    conda (params.enable_conda ? 'bioconda::gatk4:4.1.9.0' : null)
+    if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) {
+        container 'https://depot.galaxyproject.org/singularity/gatk4:4.1.9.0--py39_0'
+    } else {
+        container 'quay.io/biocontainers/gatk4:4.1.9.0--py39_0'
+    }
+
+    input:
+    tuple val(meta), path(bam)
+    path(fasta)
+    path(fai)
+    path(dict)
+
+    output:
+    tuple val(meta), path('*.split_cigar.bam'), emit: bam
+    path  '*.version.txt'                     , emit: version
+
+    script:
+    def software = getSoftwareName(task.process)
+    def prefix   = options.suffix ? "${meta.id}.${options.suffix}" : "${meta.id}"
+    """
+    gatk SplitNCigarReads \\
+        -R $fasta \\
+        -I $bam \\
+        -O ${prefix}.split_cigar.bam \\
+        $options.args
+
+    gatk --version | grep Picard | sed "s/Picard Version: //g" > ${software}.version.txt
+    """
+}
--- a/software/gatk4/splitncigarreads/meta.yml
+++ b/software/gatk4/splitncigarreads/meta.yml
@ -0,0 +1,60 @@
+name: gatk4_splitncigarreads
+description: Splits reads that contain Ns in their cigar string
+keywords:
+  - vcf
+  - merge
+tools:
+  - gatk4:
+      description: |
+        Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools
+        with a primary focus on variant discovery and genotyping. Its powerful processing engine
+        and high-performance computing features make it capable of taking on projects of any size.
+      homepage: https://gatk.broadinstitute.org/hc/en-us
+      documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s
+      doi: 10.1158/1538-7445.AM2017-3590
+params:
+  - outdir:
+      type: string
+      description: |
+        The pipeline's output directory. By default, the module will
+        output files into `$params.outdir/<SOFTWARE>`
+  - publish_dir_mode:
+      type: string
+      description: |
+        Value for the Nextflow `publishDir` mode parameter.
+        Available: symlink, rellink, link, copy, copyNoFollow, move.
+  - enable_conda:
+      type: boolean
+      description: |
+        Run the module with Conda using the software specified
+        via the `conda` directive
+  - singularity_pull_docker_container:
+      type: boolean
+      description: |
+        Instead of directly downloading Singularity images for use with Singularity,
+        force the workflow to pull and convert Docker containers instead.
+input:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test']
+  - bam:
+      type: list
+      description: BAM/SAM/CRAM file containing reads
+      pattern: "*.{bam,sam,cram}"
+  - fasta:
+      type: file
+      description: Reference sequence file
+      pattern: "*.fasta"
+output:
+  - bam:
+      type: file
+      description: Output file with split reads (BAM/SAM/CRAM)
+      pattern: "*.{bam,sam,cram}"
+  - version:
+      type: file
+      description: File containing software version
+      pattern: "*.version.txt"
+authors:
+  - "@kevinmenden"
--- a/tests/data/fasta/sarscov2/GCA_011545545.1_ASM1154554v1_genomic.fna.fai
+++ b/tests/data/fasta/sarscov2/GCA_011545545.1_ASM1154554v1_genomic.fna.fai
@ -0,0 +1 @@
+MT192765.1	29829	120	80	81
--- a/tests/software/gatk4/splitncigarreads/main.nf
+++ b/tests/software/gatk4/splitncigarreads/main.nf
@ -0,0 +1,18 @@
+#!/usr/bin/env nextflow
+
+nextflow.enable.dsl = 2
+
+include { GATK4_SPLITNCIGARREADS } from '../../../../software/gatk4/splitncigarreads/main.nf' addParams( options: [:] )
+
+workflow test_gatk4_splitncigarreads {
+
+    def input = []
+    input = [ [ id:'test' ], // meta map
+              [ file("${launchDir}/tests/data/bam/test-sc2-artic-v3-sorted-trimmed.bam", checkIfExists: true)] ]
+
+    fasta = file("tests/data/fasta/sarscov2/GCA_011545545.1_ASM1154554v1_genomic.fna", checkIfExists: true)
+    fai = file("tests/data/fasta/sarscov2/GCA_011545545.1_ASM1154554v1_genomic.fna.fai", checkIfExists: true)
+    dict = file("tests/data/fasta/sarscov2/GCA_011545545.1_ASM1154554v1_genomic.dict", checkIfExists: true)
+
+    GATK4_SPLITNCIGARREADS ( input, fasta, fai, dict )
+}
--- a/tests/software/gatk4/splitncigarreads/test.yml
+++ b/tests/software/gatk4/splitncigarreads/test.yml
@ -0,0 +1,8 @@
+- name: gatk4 splitncigarreads
+  command: nextflow run ./tests/software/gatk4/splitncigarreads -entry test_gatk4_splitncigarreads -c tests/config/nextflow.config
+  tags:
+    - gatk4
+    - gatk4_mergevcfs
+  files:
+    - path: output/gatk4/test.merged.vcf.gz
+      md5sum: f25850b7bd4d362b5ea67d4453e9df55