luslab-umitools | Module feature complete

2024-12-22 19:18:17 +00:00 · 2020-06-19 08:21:55 +01:00 · 2020-06-19 08:21:55 +01:00 · 9945ac88e0
commit 9945ac88e0
parent 436d6a6292
4 changed files with 49 additions and 45 deletions
--- a/tools/umi_tools/dedup.yml
+++ b/tools/umi_tools/dedup.yml
@ -1,38 +0,0 @@
-name: HISAT2 
-description: Graph-based alignment of next generation sequencing reads to a population of genomes
-keywords:
-    - Alignment
-    - Short reads
-    - graph FM Index (GFM)
-    - RNA-seq
-tools:
-    - fastqc:
-        description: |
-            HISAT2 is a fast and sensitive alignment program for mapping next-generation
-            sequencing reads (whole-genome, transcriptome, and exome sequencing data)
-            against the general human population (as well as against a single reference genome).
-            Based on GCSA (an extension of BWT for a graph) it is designed and implemented as a
-            graph FM index (GFM).
-        homepage: http://daehwankimlab.github.io/hisat2/
-        documentation: https://ccb.jhu.edu/software/hisat2/manual.shtml
-input:
-    -
-        - sample_id:
-            type: string
-            description: Sample identifier
-        - reads:
-            type: file
-            description: Input FastQ file, or pair of files
-output:
-    -
-        - report:
-            type: file
-            description: mapping statistics report
-            pattern: *hisat2_stats.txt
-        - alignment:
-            type: file
-            description: alignment file in BAM format
-            pattern: *hisat2.bam
-authors:
-    - @FelixKrueger
-
--- a/tools/umi_tools/main.nf
+++ b/tools/umi_tools/main.nf
@ -17,7 +17,8 @@ process umitools_dedup {
      tuple val(sample_id), path(bai), path(bam)
       
    output:
-      tuple val(sample_id), path(bam), emit: dedupBam
+      tuple val(sample_id), path("*.dedup.bam"), emit: dedupBam
+      path "*.dedup.log", emit: report

    shell:

@ -30,16 +31,17 @@ process umitools_dedup {
    // Check main args string exists and strip whitespace
    if(params.umitools_dedup_args) {
        internal_args = params.umitools_dedup_args
-        internal_args = internal_args.trim() + " "
+        internal_args = internal_args.trim() + " --log=${sample_id}.dedup.log "
        args_exist = "true"
    }
    else {
        error_message = "params.umitools_dedup_args does not exist, please define to run this module."
    }

+    //SHELL
    """
    if ${args_exist}; then
-        ${internal_prog}${internal_args}-I $bam 
+        ${internal_prog}${internal_args}-I $bam -S ${sample_id}.dedup.bam --output-stats=${sample_id}
    else
        echo "${error_message}" 1>&2
        exit 1
--- a/tools/umi_tools/test/main.nf
+++ b/tools/umi_tools/test/main.nf
@ -23,10 +23,6 @@ include umitools_dedup from '../main.nf'
 /* Define input channels
 --------------------------------------------------------------------------------------*/

- //fileName=`basename $bam`
- //   sampleName="\${fileName%.Aligned.sortedByCoord.out.bam}"
-  //  umi_tools dedup --umi-separator=":" -I $bam -S \${sampleName}.dedup.bam --output-stats=\${sampleName}
-
 // Meta data
 testMetaDataBam = [
  ['sample1', "$baseDir/input/sample1.bam"],
--- a/tools/umi_tools/umi_tools.yml
+++ b/tools/umi_tools/umi_tools.yml
@ -0,0 +1,44 @@
+name: umi_tools
+version: 1.0
+description: Tools for dealing with Unique Molecular Identifiers (UMIs)/Random Molecular Tags (RMTs) and single cell RNA-Seq cell barcodes. 
+keywords:
+    - UMI
+    - RMT
+    - Barcode
+tools:
+    - umi_tools:
+        description: |
+            Tools for dealing with Unique Molecular Identifiers (UMIs)/Random Molecular Tags (RMTs) and single cell RNA-Seq cell barcodes. 
+        homepage: https://github.com/CGATOxford/UMI-tools
+        documentation: https://umi-tools.readthedocs.io/en/latest/
+processes:
+    - dedup:
+        opperation: |
+            Set command args to params.umitools_dedup_args
+            The program will execute with the following pattern: umi_tools dedeup ARGS -I $bam -S SAMPLE_ID.dedup.bam --output-stats=SAMPLE_ID
+        description: |
+            Groups PCR duplicates and deduplicates reads to yield one read per group. 
+            Use this when you want to remove the PCR duplicates prior to any downstream analysis.
+        input:
+            - sample_id:
+                type: string
+                description: Sample identifier
+            - bai:
+                type: file
+                description: BAM file index
+            - bam:
+                type: file
+                description: BAM sequence file
+        output:
+            - dedupBam:
+                type: tuple
+                description: A tuple of samples id and output bam file
+                pattern: *SAMPLE_ID.dedup.bam
+            - report:
+                type: file
+                description: Log file for the umi_tools opperation
+                pattern: *SAMPLE_ID.dedup.log
+authors:
+    - @candiceh08
+    - @chris-cheshire
+