Merge branch 'master' into add_compression_to_bam2fq

This commit is contained in:
Lauri Mesilaakso 2022-05-04 18:41:36 +03:00 committed by GitHub
commit e757966566
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
11 changed files with 345 additions and 0 deletions

View file

@ -0,0 +1,42 @@
def VERSION = '0.3.14'
process HAPPY_HAPPY {
tag "$meta.id"
label 'process_medium'
conda (params.enable_conda ? "bioconda::hap.py=0.3.14" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/hap.py:0.3.14--py27h5c5a3ab_0':
'quay.io/biocontainers/hap.py:0.3.14--py27h5c5a3ab_0' }"
input:
tuple val(meta), path(truth_vcf), path(query_vcf), path(bed)
tuple path(fasta), path(fasta_fai)
output:
tuple val(meta), path('*.csv'), path('*.json') , emit: metrics
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
"""
hap.py \\
$truth_vcf \\
$query_vcf \\
$args \\
--reference $fasta \\
--threads $task.cpus \\
-R $bed \\
-o $prefix
cat <<-END_VERSIONS > versions.yml
"${task.process}":
hap.py: $VERSION
END_VERSIONS
"""
}

View file

@ -0,0 +1,67 @@
name: "happy_happy"
description: Hap.py is a tool to compare diploid genotypes at haplotype level. Rather than comparing VCF records row by row, hap.py will generate and match alternate sequences in a superlocus. A superlocus is a small region of the genome (sized between 1 and around 1000 bp) that contains one or more variants.
keywords:
- happy
- benchmark
- haplotype
tools:
- "happy":
description: "Haplotype VCF comparison tools"
homepage: "https://www.illumina.com/products/by-type/informatics-products/basespace-sequence-hub/apps/hap-py-benchmarking.html"
documentation: "https://github.com/Illumina/hap.py"
tool_dev_url: "https://github.com/Illumina/hap.py"
doi: ""
licence: "['BSD-2-clause']"
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- truth_vcf:
type: file
description: gold standard VCF file
pattern: "*.{vcf,vcf.gz}"
- query_vcf:
type: file
description: VCF/GVCF file to query
pattern: "*.{vcf,vcf.gz}"
- bed:
type: file
description: BED file
pattern: "*.bed"
- fasta:
type: file
description: FASTA file of the reference genome
pattern: "*.{fa,fasta}"
- fasta_fai:
type: file
description: The index of the reference FASTA
pattern: "*.fai"
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- summary:
type: file
description: A CSV file containing the summary of the benchmarking
pattern: "*.summary.csv"
- extended:
type: file
description: A CSV file containing extended info of the benchmarking
pattern: "*.extended.csv"
- runinfo:
type: file
description: A JSON file containing the run info
pattern: "*.runinfo.json"
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
authors:
- "@nvnieuwk"

View file

@ -0,0 +1,41 @@
def VERSION = '0.3.14'
process HAPPY_PREPY {
tag "$meta.id"
label 'process_medium'
conda (params.enable_conda ? "bioconda::hap.py=0.3.14" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/hap.py:0.3.14--py27h5c5a3ab_0':
'quay.io/biocontainers/hap.py:0.3.14--py27h5c5a3ab_0' }"
input:
tuple val(meta), path(vcf), path(bed)
tuple path(fasta), path(fasta_fai)
output:
tuple val(meta), path('*.vcf.gz') , emit: preprocessed_vcf
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
"""
pre.py \\
$args \\
-R $bed \\
--reference $fasta \\
--threads $task.cpus \\
$vcf \\
${prefix}.vcf.gz
cat <<-END_VERSIONS > versions.yml
"${task.process}":
pre.py: $VERSION
END_VERSIONS
"""
}

View file

@ -0,0 +1,55 @@
name: "happy_prepy"
description: Pre.py is a preprocessing tool made to preprocess VCF files for Hap.py
keywords:
- happy
- benchmark
- haplotype
tools:
- "happy":
description: "Haplotype VCF comparison tools"
homepage: "https://www.illumina.com/products/by-type/informatics-products/basespace-sequence-hub/apps/hap-py-benchmarking.html"
documentation: "https://github.com/Illumina/hap.py"
tool_dev_url: "https://github.com/Illumina/hap.py"
doi: ""
licence: "['BSD-2-clause']"
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- vcf:
type: file
description: VCF file to preprocess
pattern: "*.{vcf,vcf.gz}"
- bed:
type: file
description: BED file
pattern: "*.bed"
- fasta:
type: file
description: FASTA file of the reference genome
pattern: "*.{fa,fasta}"
- fasta_fai:
type: file
description: The index of the reference FASTA
pattern: "*.fai"
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- vcf:
type: file
description: A preprocessed VCF file
pattern: "*.vcf.gz"
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
authors:
- "@nvnieuwk"

View file

@ -891,6 +891,14 @@ hamronization/summarize:
- modules/hamronization/summarize/**
- tests/modules/hamronization/summarize/**
happy/happy:
- modules/happy/happy/**
- tests/modules/happy/happy/**
happy/prepy:
- modules/happy/prepy/**
- tests/modules/happy/prepy/**
hicap:
- modules/hicap/**
- tests/modules/hicap/**

View file

@ -0,0 +1,39 @@
#!/usr/bin/env nextflow
nextflow.enable.dsl = 2
include { HAPPY_HAPPY } from '../../../../modules/happy/happy/main.nf'
workflow test_happy_vcf {
input = [
[ id:'test' ], // meta map
file(params.test_data['homo_sapiens']['illumina']['test_rnaseq_vcf'], checkIfExists: true),
file(params.test_data['homo_sapiens']['illumina']['test_genome21_indels_vcf_gz'], checkIfExists: true),
file(params.test_data['homo_sapiens']['genome']['genome_bed'], checkIfExists: true)
]
fasta = Channel.value([
file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true),
file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
])
HAPPY_HAPPY ( input, fasta )
}
workflow test_happy_gvcf {
input = [
[ id:'test' ], // meta map
file(params.test_data['homo_sapiens']['illumina']['test_rnaseq_vcf'], checkIfExists: true),
file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf'], checkIfExists: true),
file(params.test_data['homo_sapiens']['genome']['genome_bed'], checkIfExists: true)
]
fasta = Channel.value([
file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true),
file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
])
HAPPY_HAPPY ( input, fasta )
}

View file

@ -0,0 +1,5 @@
process {
publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
}

View file

@ -0,0 +1,27 @@
- name: happy happy test_happy_vcf
command: nextflow run tests/modules/happy/happy -entry test_happy_vcf -c tests/config/nextflow.config
tags:
- happy
- happy/happy
files:
- path: output/happy/test.extended.csv
md5sum: ef79c7c789ef4f146ca2e50dafaf22b3
- path: output/happy/test.runinfo.json
- path: output/happy/test.summary.csv
md5sum: f8aa5d36d3c48dede2f607fd565894ad
- path: output/happy/versions.yml
md5sum: 82243bf6dbdc71aa63211ee2a89f47f2
- name: happy happy test_happy_gvcf
command: nextflow run tests/modules/happy/happy -entry test_happy_gvcf -c tests/config/nextflow.config
tags:
- happy
- happy/happy
files:
- path: output/happy/test.extended.csv
md5sum: 3d5c21b67a259a3f6dcb088d55b86cd3
- path: output/happy/test.runinfo.json
- path: output/happy/test.summary.csv
md5sum: 03044e9bb5a0c6f0947b7e910fc8a558
- path: output/happy/versions.yml
md5sum: 551fa216952d6f5de78e6e453b92aaab

View file

@ -0,0 +1,37 @@
#!/usr/bin/env nextflow
nextflow.enable.dsl = 2
include { HAPPY_PREPY } from '../../../../modules/happy/prepy/main.nf'
workflow test_happy_prepy_vcf {
input = [
[ id:'test' ], // meta map
file(params.test_data['homo_sapiens']['illumina']['test_genome21_indels_vcf_gz'], checkIfExists: true),
file(params.test_data['homo_sapiens']['genome']['genome_bed'], checkIfExists: true)
]
fasta = Channel.value([
file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true),
file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
])
HAPPY_PREPY ( input, fasta )
}
workflow test_happy_prepy_gvcf {
input = [
[ id:'test' ], // meta map
file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf'], checkIfExists: true),
file(params.test_data['homo_sapiens']['genome']['genome_bed'], checkIfExists: true)
]
fasta = Channel.value([
file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true),
file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
])
HAPPY_PREPY ( input, fasta )
}

View file

@ -0,0 +1,5 @@
process {
publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
}

View file

@ -0,0 +1,19 @@
- name: happy prepy test_happy_prepy_vcf
command: nextflow run tests/modules/happy/prepy -entry test_happy_prepy_vcf -c tests/config/nextflow.config
tags:
- happy/prepy
- happy
files:
- path: output/happy/test.vcf.gz
- path: output/happy/versions.yml
md5sum: 814d20f1f29f23a3d21012748a5d6393
- name: happy prepy test_happy_prepy_gvcf
command: nextflow run tests/modules/happy/prepy -entry test_happy_prepy_gvcf -c tests/config/nextflow.config
tags:
- happy/prepy
- happy
files:
- path: output/happy/test.vcf.gz
- path: output/happy/versions.yml
md5sum: 970a54de46e68ef6d5228a26eaa4c8e7