Tool/crosscheckfingerprints (#1505)

* first commit

* first commit

* update test.yml

* update test.yml

* Update modules/picard/crosscheckfingerprints/main.nf

Co-authored-by: Jose Espinosa-Carrasco <kadomu@gmail.com>

* Update modules/picard/crosscheckfingerprints/main.nf

Co-authored-by: Jose Espinosa-Carrasco <kadomu@gmail.com>

* add support for vcf haplotype maps

* update test

* update test data config, use test data

* fix exit code

* Update modules/picard/crosscheckfingerprints/main.nf

Co-authored-by: Sateesh Peri <33637490+sateeshperi@users.noreply.github.com>

* Update modules/picard/crosscheckfingerprints/main.nf

Co-authored-by: Sateesh Peri <33637490+sateeshperi@users.noreply.github.com>

* remove unused stub

Co-authored-by: Jose Espinosa-Carrasco <kadomu@gmail.com>
Co-authored-by: Sateesh Peri <33637490+sateeshperi@users.noreply.github.com>
This commit is contained in:
Matthias De Smet 2022-04-08 16:02:10 +02:00 committed by GitHub
parent e19a9a2474
commit d4160c669b
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 136 additions and 0 deletions

View file

@ -0,0 +1,51 @@
process PICARD_CROSSCHECKFINGERPRINTS {
tag "$meta.id"
label 'process_medium'
conda (params.enable_conda ? "bioconda::picard=2.26.10" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/picard:2.26.10--hdfd78af_0' :
'quay.io/biocontainers/picard:2.26.10--hdfd78af_0' }"
input:
tuple val(meta), path(input1)
path input2
path haplotype_map
output:
tuple val(meta), path("*.crosscheck_metrics.txt"), emit: crosscheck_metrics
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def input1_string = input1.join(" --INPUT ")
def input2_string = input2 ? "--SECOND_INPUT " + input2.join(" --SECOND_INPUT ") : ""
def avail_mem = 3
if (!task.memory) {
log.info '[Picard CrosscheckFingerprints] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.'
} else {
avail_mem = task.memory.giga
}
"""
picard \\
-Xmx${avail_mem}g \\
CrosscheckFingerprints \\
$args \\
--NUM_THREADS ${task.cpus} \\
--INPUT $input1_string \\
$input2_string \\
--HAPLOTYPE_MAP ${haplotype_map} \\
--OUTPUT ${prefix}.crosscheck_metrics.txt
cat <<-END_VERSIONS > versions.yml
"${task.process}":
picard: \$( picard CrosscheckFingerprints --version 2>&1 | grep -o 'Version:.*' | cut -f2- -d: )
END_VERSIONS
"""
}

View file

@ -0,0 +1,53 @@
name: "picard_crosscheckfingerprints"
description: Checks that all data in the set of input files appear to come from the same individual
keywords:
- alignment
- metrics
- statistics
- fingerprint
- bam
tools:
- picard:
description: |
A set of command line tools (in Java) for manipulating high-throughput sequencing (HTS)
data and formats such as SAM/BAM/CRAM and VCF.
homepage: https://broadinstitute.github.io/picard/
documentation: https://broadinstitute.github.io/picard/
tool_dev_url: https://github.com/broadinstitute/picard/
licence: ["MIT"]
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- input1:
type: file
description: List containing 1 or more bam/vcf files or a file containing filepaths
pattern: "*.{bam,vcf,vcf.gz,txt,fofn}"
- input2:
type: file
description: Optional list containing 1 or more bam/vcf files or a file containing filepaths
pattern: "*.{bam,vcf,vcf.gz,txt,fofn}"
- haplotype_map:
type: file
description: Haplotype map file
pattern: "*.{txt,vcf,vcf.gz}"
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- crosscheck_metrics:
type: file
description: Metrics created by crosscheckfingerprints
pattern: "*.{crosscheck_metrics.txt}"
authors:
- "@matthdsm"

View file

@ -1343,6 +1343,10 @@ picard/createsequencedictionary:
- modules/picard/createsequencedictionary/** - modules/picard/createsequencedictionary/**
- tests/modules/picard/createsequencedictionary/** - tests/modules/picard/createsequencedictionary/**
picard/crosscheckfingerprints:
- modules/picard/crosscheckfingerprints/**
- tests/modules/picard/crosscheckfingerprints/**
picard/filtersamreads: picard/filtersamreads:
- modules/picard/filtersamreads/** - modules/picard/filtersamreads/**
- tests/modules/picard/filtersamreads/** - tests/modules/picard/filtersamreads/**

View file

@ -161,6 +161,7 @@ params {
gnomad_r2_1_1_21_vcf_gz_tbi = "${test_data_dir}/genomics/homo_sapiens/genome/chr21/germlineresources/gnomAD.r2.1.1.vcf.gz.tbi" gnomad_r2_1_1_21_vcf_gz_tbi = "${test_data_dir}/genomics/homo_sapiens/genome/chr21/germlineresources/gnomAD.r2.1.1.vcf.gz.tbi"
mills_and_1000g_indels_21_vcf_gz = "${test_data_dir}/genomics/homo_sapiens/genome/chr21/germlineresources/mills_and_1000G.indels.hg38.vcf.gz" mills_and_1000g_indels_21_vcf_gz = "${test_data_dir}/genomics/homo_sapiens/genome/chr21/germlineresources/mills_and_1000G.indels.hg38.vcf.gz"
mills_and_1000g_indels_21_vcf_gz_tbi = "${test_data_dir}/genomics/homo_sapiens/genome/chr21/germlineresources/mills_and_1000G.indels.hg38.vcf.gz.tbi" mills_and_1000g_indels_21_vcf_gz_tbi = "${test_data_dir}/genomics/homo_sapiens/genome/chr21/germlineresources/mills_and_1000G.indels.hg38.vcf.gz.tbi"
haplotype_map = "${test_data_dir}/genomics/homo_sapiens/genome/chr21/germlineresources/haplotype_map.txt"
index_salmon = "${test_data_dir}/genomics/homo_sapiens/genome/index/salmon" index_salmon = "${test_data_dir}/genomics/homo_sapiens/genome/index/salmon"
repeat_expansions = "${test_data_dir}/genomics/homo_sapiens/genome/loci/repeat_expansions.json" repeat_expansions = "${test_data_dir}/genomics/homo_sapiens/genome/loci/repeat_expansions.json"

View file

@ -0,0 +1,14 @@
#!/usr/bin/env nextflow
nextflow.enable.dsl = 2
include { PICARD_CROSSCHECKFINGERPRINTS } from '../../../../modules/picard/crosscheckfingerprints/main.nf'
workflow test_picard_crosscheckfingerprints {
input = [
[ id:'test', single_end:false ], // meta map
[file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_sorted_bam'], checkIfExists: true), file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true)],
]
PICARD_CROSSCHECKFINGERPRINTS ( input,[], file(params.test_data['homo_sapiens']['genome']['haplotype_map'], checkIfExists: true))
}

View file

@ -0,0 +1,5 @@
process {
publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
withName: PICARD_CROSSCHECKFINGERPRINTS {ext.args = "--EXIT_CODE_WHEN_MISMATCH 0"}
}

View file

@ -0,0 +1,8 @@
- name: "picard crosscheckfingerprints"
command: nextflow run ./tests/modules/picard/crosscheckfingerprints -entry test_picard_crosscheckfingerprints -c ./tests/config/nextflow.config -c ./tests/modules/picard/crosscheckfingerprints/nextflow.config
tags:
- "picard"
- "picard/crosscheckfingerprints"
files:
- path: "output/picard/test.crosscheck_metrics.txt"
- path: output/picard/versions.yml