Add docs and tests for markduplicates

2024-12-22 11:08:17 +00:00 · 2020-08-07 16:30:28 +01:00 · 2020-08-07 16:30:28 +01:00 · 3547b64eab
commit 3547b64eab
parent cb85acddf0
6 changed files with 119 additions and 0 deletions
--- a/software/picard/markduplicates/meta.yml
+++ b/software/picard/markduplicates/meta.yml
@ -0,0 +1,67 @@
+name: picard_markduplicates
+description: Locate and tag duplicate reads in a BAM file
+keywords:
+    - markduplicates
+    - pcr
+    - duplicates
+    - bam
+    - sam
+    - cram
+tools:
+    - picard:
+        description: |
+            A set of command line tools (in Java) for manipulating high-throughput sequencing (HTS)
+            data and formats such as SAM/BAM/CRAM and VCF.
+        homepage: https://broadinstitute.github.io/picard/
+        documentation: https://broadinstitute.github.io/picard/
+params:
+    - outdir:
+        type: string
+        description: |
+            The pipeline's output directory. By default, the module will
+            output files into `$params.outdir/<SOFTWARE>`
+    - publish_dir_mode:
+        type: string
+        description: |
+            Value for the Nextflow `publishDir` mode parameter.
+            Available: symlink, rellink, link, copy, copyNoFollow, move.
+    - conda:
+        type: boolean
+        description: |
+            Run the module with Conda using the software specified
+            via the `conda` directive
+input:
+    - meta:
+        type: map
+        description: |
+            Groovy Map containing sample information
+            e.g. [ id:'test', single_end:false ]
+    - bam:
+        type: file
+        description: BAM file
+        pattern: "*.{bam}"
+    - options:
+        type: map
+        description: |
+            Groovy Map containing module options for passing command-line arguments and
+            output file paths.
+output:
+    - meta:
+        type: map
+        description: |
+            Groovy Map containing sample information
+            e.g. [ id:'test', single_end:false ]
+    - bam:
+        type: file
+        description: BAM file with duplicate reads marked/removed
+        pattern: "*.{bam}"
+    - metrics:
+        type: file
+        description: Duplicate metrics file generated by picard
+        pattern: "*.{metrics.txt}"
+    - version:
+        type: file
+        description: File containing software version
+        pattern: "*.{version.txt}"
+authors:
+    - "@drpatelh"
--- a/software/picard/markduplicates/test/input/test.paired_end.sorted.bam
+++ b/software/picard/markduplicates/test/input/test.paired_end.sorted.bam
@ -0,0 +1 @@
+../../../../../tests/data/bam/test.paired_end.sorted.bam
--- a/software/picard/markduplicates/test/main.nf
+++ b/software/picard/markduplicates/test/main.nf
@ -0,0 +1,18 @@
+#!/usr/bin/env nextflow
+
+nextflow.enable.dsl = 2
+
+include { PICARD_MARKDUPLICATES } from '../main.nf'
+
+workflow test {
+
+    def input = []
+    input = [ [ id:'test', single_end:false ], // meta map
+              file("${baseDir}/input/test.paired_end.sorted.bam", checkIfExists: true) ]
+
+    PICARD_MARKDUPLICATES ( input, [:] )
+}
+
+workflow {
+    test()
+}
--- a/software/picard/markduplicates/test/nextflow.config
+++ b/software/picard/markduplicates/test/nextflow.config
@ -0,0 +1,20 @@
+
+params {
+    outdir = "output/"
+    publish_dir_mode = "copy"
+    conda = false
+}
+
+profiles {
+    conda  {
+        params.conda = true
+    }
+    docker {
+        docker.enabled = true
+        docker.runOptions = '-u \$(id -u):\$(id -g)'
+    }
+    singularity {
+        singularity.enabled = true
+        singularity.autoMounts = true
+    }
+}
--- a/software/picard/markduplicates/test/output/picard/test.MarkDuplicates.metrics.txt
+++ b/software/picard/markduplicates/test/output/picard/test.MarkDuplicates.metrics.txt
@ -0,0 +1,13 @@
+## htsjdk.samtools.metrics.StringHeader
+# MarkDuplicates INPUT=[test.paired_end.sorted.bam] OUTPUT=test.bam METRICS_FILE=test.MarkDuplicates.metrics.txt    MAX_SEQUENCES_FOR_DISK_READ_ENDS_MAP=50000 MAX_FILE_HANDLES_FOR_READ_ENDS_MAP=8000 SORTING_COLLECTION_SIZE_RATIO=0.25 TAG_DUPLICATE_SET_MEMBERS=false REMOVE_SEQUENCING_DUPLICATES=false TAGGING_POLICY=DontTag CLEAR_DT=true DUPLEX_UMI=false ADD_PG_TAG_TO_READS=true REMOVE_DUPLICATES=false ASSUME_SORTED=false DUPLICATE_SCORING_STRATEGY=SUM_OF_BASE_QUALITIES PROGRAM_RECORD_ID=MarkDuplicates PROGRAM_GROUP_NAME=MarkDuplicates READ_NAME_REGEX=<optimized capture of last three ':' separated fields as numeric values> OPTICAL_DUPLICATE_PIXEL_DISTANCE=100 MAX_OPTICAL_DUPLICATE_SET_SIZE=300000 VERBOSITY=INFO QUIET=false VALIDATION_STRINGENCY=STRICT COMPRESSION_LEVEL=5 MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false CREATE_MD5_FILE=false GA4GH_CLIENT_SECRETS=client_secrets.json USE_JDK_DEFLATER=false USE_JDK_INFLATER=false
+## htsjdk.samtools.metrics.StringHeader
+# Started on: Fri Aug 07 15:11:32 GMT 2020
+
+## METRICS CLASS	picard.sam.DuplicationMetrics
+LIBRARY	UNPAIRED_READS_EXAMINED	READ_PAIRS_EXAMINED	SECONDARY_OR_SUPPLEMENTARY_RDS	UNMAPPED_READS	UNPAIRED_READ_DUPLICATES	READ_PAIR_DUPLICATES	READ_PAIR_OPTICAL_DUPLICATES	PERCENT_DUPLICATION	ESTIMATED_LIBRARY_SIZE
+Unknown Library	0	10000	0	0	0	0	0	0	
+
+## HISTOGRAM	java.lang.Double
+set_size	all_sets	non_optical_sets
+1.0	10000	10000
+
--- a/software/picard/markduplicates/test/output/picard/test.bam
+++ b/software/picard/markduplicates/test/output/picard/test.bam
				`@ -0,0 +1 @@`
				`../../../../../tests/data/bam/test.paired_end.sorted.bam`