Picard createsequencedictionary (#1310)

* add picard/createsequencedictionary module

* add picard-CreateSequenceDictionary

* add picard/createsequencedictionary/

* add contains to test yml

* update test yml contains

* update test yml contains

Co-authored-by: Peri <rrx8@cdc.gov>
Co-authored-by: Robert A. Petit III <robbie.petit@gmail.com>
This commit is contained in:
Sateesh 2022-02-18 20:49:12 -05:00 committed by GitHub
parent f655e5dea2
commit 62e5d1f0b3
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 121 additions and 0 deletions

View file

@ -0,0 +1,42 @@
process PICARD_CREATESEQUENCEDICTIONARY {
tag "$meta.id"
label 'process_medium'
conda (params.enable_conda ? "bioconda::picard=2.26.9" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/picard:2.26.9--hdfd78af_0' :
'quay.io/biocontainers/picard:2.26.9--hdfd78af_0' }"
input:
tuple val(meta), path(fasta)
output:
tuple val(meta), path("*.dict"), emit: reference_dict
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def avail_mem = 3
if (!task.memory) {
log.info '[Picard CreateSequenceDictionary] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.'
} else {
avail_mem = task.memory.giga
}
"""
picard \\
-Xmx${avail_mem}g \\
CreateSequenceDictionary \\
$args \\
R=$fasta \\
O=${prefix}.dict
cat <<-END_VERSIONS > versions.yml
"${task.process}":
picard: \$(picard CreateSequenceDictionary --version 2>&1 | grep -o 'Version:.*' | cut -f2- -d:)
END_VERSIONS
"""
}

View file

@ -0,0 +1,45 @@
name: picard_createsequencedictionary
description: Creates a sequence dictionary for a reference sequence.
keywords:
- sequence
- dictionary
- picard
tools:
- picard:
description: |
Creates a sequence dictionary file (with ".dict" extension) from a reference sequence provided in FASTA format, which is required by many processing and analysis tools. The output file contains a header but no SAMRecords, and the header contains only sequence records.
homepage: https://broadinstitute.github.io/picard/
documentation: https://gatk.broadinstitute.org/hc/en-us/articles/360036712531-CreateSequenceDictionary-Picard-
tool_dev_url: https://github.com/broadinstitute/picard
licence: ['MIT']
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- fasta:
type: file
description: The reference fasta file
pattern: "*.fasta"
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- dict:
type: file
description: picard dictionary file
pattern: "*.{dict}"
authors:
- "@sateeshperi"
- "@mjcipriano"
- "@hseabolt"

View file

@ -1169,6 +1169,10 @@ picard/collectwgsmetrics:
- modules/picard/collectwgsmetrics/**
- tests/modules/picard/collectwgsmetrics/**
picard/createsequencedictionary:
- modules/picard/createsequencedictionary/**
- tests/modules/picard/createsequencedictionary/**
picard/filtersamreads:
- modules/picard/filtersamreads/**
- tests/modules/picard/filtersamreads/**

View file

@ -0,0 +1,15 @@
#!/usr/bin/env nextflow
nextflow.enable.dsl = 2
include { PICARD_CREATESEQUENCEDICTIONARY } from '../../../../modules/picard/createsequencedictionary/main.nf'
workflow test_picard_createsequencedictionary {
input = [
[ id:'test' ], // meta map
file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
]
PICARD_CREATESEQUENCEDICTIONARY ( input )
}

View file

@ -0,0 +1,5 @@
process {
publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
}

View file

@ -0,0 +1,10 @@
- name: picard createsequencedictionary test_picard_createsequencedictionary
command: nextflow run tests/modules/picard/createsequencedictionary -entry test_picard_createsequencedictionary -c tests/config/nextflow.config
tags:
- picard/createsequencedictionary
- picard
files:
- path: output/picard/test.dict
contains: ['SN:MT192765.1']
- path: output/picard/versions.yml
md5sum: b3d8c7ea65b8a6d3237b153d13fe2014