mirror of
https://github.com/MillironX/nf-core_modules.git
synced 2024-12-22 11:08:17 +00:00
New module: elprep filter (#1524)
* first commit * syntax fix * fix input * output sam during test for md5sum * replace md5sum with contains * add new test data, add extra in/outputs * cli fixes * fix outputs * Update modules/elprep/filter/main.nf Co-authored-by: James A. Fellows Yates <jfy133@gmail.com> * Update modules/elprep/filter/meta.yml Co-authored-by: James A. Fellows Yates <jfy133@gmail.com> * Update modules/elprep/filter/meta.yml Co-authored-by: James A. Fellows Yates <jfy133@gmail.com> * fix suggestions by @jfy133 * Bit more verbose explanation for bool vals * define variables * fix prettier Co-authored-by: James A. Fellows Yates <jfy133@gmail.com>
This commit is contained in:
parent
90b203d3e9
commit
9e3daae8ef
7 changed files with 240 additions and 0 deletions
89
modules/elprep/filter/main.nf
Normal file
89
modules/elprep/filter/main.nf
Normal file
|
@ -0,0 +1,89 @@
|
||||||
|
process ELPREP_FILTER {
|
||||||
|
tag "$meta.id"
|
||||||
|
label 'process_high'
|
||||||
|
|
||||||
|
conda (params.enable_conda ? "bioconda::elprep=5.1.2" : null)
|
||||||
|
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
|
||||||
|
'https://depot.galaxyproject.org/singularity/elprep:5.1.2--he881be0_0':
|
||||||
|
'quay.io/biocontainers/elprep:5.1.2--he881be0_0' }"
|
||||||
|
|
||||||
|
input:
|
||||||
|
tuple val(meta), path(bam)
|
||||||
|
val(run_haplotypecaller)
|
||||||
|
val(run_bqsr)
|
||||||
|
path(reference_sequences)
|
||||||
|
path(filter_regions_bed)
|
||||||
|
path(reference_elfasta)
|
||||||
|
path(known_sites_elsites)
|
||||||
|
path(target_regions_bed)
|
||||||
|
path(intermediate_bqsr_tables)
|
||||||
|
val(bqsr_tables_only)
|
||||||
|
val(get_activity_profile)
|
||||||
|
val(get_assembly_regions)
|
||||||
|
|
||||||
|
|
||||||
|
output:
|
||||||
|
tuple val(meta), path("**.{bam,sam}") ,emit: bam
|
||||||
|
tuple val(meta), path("*.metrics.txt") ,optional: true, emit: metrics
|
||||||
|
tuple val(meta), path("*.recall") ,optional: true, emit: recall
|
||||||
|
tuple val(meta), path("*.vcf.gz") ,optional: true, emit: gvcf
|
||||||
|
tuple val(meta), path("*.table") ,optional: true, emit: table
|
||||||
|
tuple val(meta), path("*.activity_profile.igv") ,optional: true, emit: activity_profile
|
||||||
|
tuple val(meta), path("*.assembly_regions.igv") ,optional: true, emit: assembly_regions
|
||||||
|
path "versions.yml" ,emit: versions
|
||||||
|
|
||||||
|
when:
|
||||||
|
task.ext.when == null || task.ext.when
|
||||||
|
|
||||||
|
script:
|
||||||
|
def args = task.ext.args ?: ''
|
||||||
|
def prefix = task.ext.prefix ?: "${meta.id}"
|
||||||
|
def suffix = args.contains("--output-type sam") ? "sam" : "bam"
|
||||||
|
|
||||||
|
// filter args
|
||||||
|
def reference_sequences_cmd = reference_sequences ? " --replace-reference-sequences ${reference_sequences}" : ""
|
||||||
|
def filter_regions_cmd = filter_regions_bed ? " --filter-non-overlapping-reads ${filter_regions_bed}" : ""
|
||||||
|
|
||||||
|
// markdup args
|
||||||
|
def markdup_cmd = args.contains("--mark-duplicates") ? " --mark-optical-duplicates ${prefix}.metrics.txt": ""
|
||||||
|
|
||||||
|
// variant calling args
|
||||||
|
def haplotyper_cmd = run_haplotypecaller ? " --haplotypecaller ${prefix}.g.vcf.gz": ""
|
||||||
|
|
||||||
|
def fasta_cmd = reference_elfasta ? " --reference ${reference_elfasta}": ""
|
||||||
|
def known_sites_cmd = known_sites_elsites ? " --known-sites ${known_sites_elsites}": ""
|
||||||
|
def target_regions_cmd = target_regions_bed ? " --target-regions ${target_regions_bed}": ""
|
||||||
|
|
||||||
|
// bqsr args
|
||||||
|
def bqsr_cmd = run_bqsr ? " --bqsr ${prefix}.recall": ""
|
||||||
|
def bqsr_tables_only_cmd = bqsr_tables_only ? " --bqsr-tables-only ${prefix}.table": ""
|
||||||
|
|
||||||
|
def intermediate_bqsr_cmd = intermediate_bqsr_tables ? " --bqsr-apply .": ""
|
||||||
|
|
||||||
|
// misc
|
||||||
|
def activity_profile_cmd = get_activity_profile ? " --activity-profile ${prefix}.activity_profile.igv": ""
|
||||||
|
def assembly_regions_cmd = get_assembly_regions ? " --assembly-regions ${prefix}.assembly_regions.igv": ""
|
||||||
|
|
||||||
|
"""
|
||||||
|
elprep filter ${bam} ${prefix}.${suffix} \\
|
||||||
|
${reference_sequences_cmd} \\
|
||||||
|
${filter_regions_cmd} \\
|
||||||
|
${markdup_cmd} \\
|
||||||
|
${haplotyper_cmd} \\
|
||||||
|
${fasta_cmd} \\
|
||||||
|
${known_sites_cmd} \\
|
||||||
|
${target_regions_cmd} \\
|
||||||
|
${bqsr_cmd} \\
|
||||||
|
${bqsr_tables_only_cmd} \\
|
||||||
|
${intermediate_bqsr_cmd} \\
|
||||||
|
${activity_profile_cmd} \\
|
||||||
|
${assembly_regions_cmd} \\
|
||||||
|
--nr-of-threads ${task.cpus} \\
|
||||||
|
$args
|
||||||
|
|
||||||
|
cat <<-END_VERSIONS > versions.yml
|
||||||
|
"${task.process}":
|
||||||
|
elprep: \$(elprep 2>&1 | head -n2 | tail -n1 |sed 's/^.*version //;s/ compiled.*\$//')
|
||||||
|
END_VERSIONS
|
||||||
|
"""
|
||||||
|
}
|
106
modules/elprep/filter/meta.yml
Normal file
106
modules/elprep/filter/meta.yml
Normal file
|
@ -0,0 +1,106 @@
|
||||||
|
name: "elprep_filter"
|
||||||
|
description: "Filter, sort and markdup sam/bam files, with optional BQSR and variant calling."
|
||||||
|
keywords:
|
||||||
|
- sort
|
||||||
|
- bam
|
||||||
|
- sam
|
||||||
|
- filter
|
||||||
|
- variant calling
|
||||||
|
tools:
|
||||||
|
- "elprep":
|
||||||
|
description: "elPrep is a high-performance tool for preparing .sam/.bam files for variant calling in sequencing pipelines. It can be used as a drop-in replacement for SAMtools/Picard/GATK4."
|
||||||
|
homepage: "https://github.com/ExaScience/elprep"
|
||||||
|
documentation: "https://github.com/ExaScience/elprep"
|
||||||
|
tool_dev_url: "https://github.com/ExaScience/elprep"
|
||||||
|
doi: "10.1371/journal.pone.0244471"
|
||||||
|
licence: "['AGPL v3']"
|
||||||
|
|
||||||
|
input:
|
||||||
|
- meta:
|
||||||
|
type: map
|
||||||
|
description: |
|
||||||
|
Groovy Map containing sample information
|
||||||
|
e.g. [ id:'test', single_end:false ]
|
||||||
|
- bam:
|
||||||
|
type: file
|
||||||
|
description: Input SAM/BAM file
|
||||||
|
pattern: "*.{bam,sam}"
|
||||||
|
- run_haplotypecaller:
|
||||||
|
type: boolean
|
||||||
|
description: Run variant calling on the input files. Needed to generate gvcf output.
|
||||||
|
- run_bqsr:
|
||||||
|
type: boolean
|
||||||
|
description: Run BQSR on the input files. Needed to generate recall metrics.
|
||||||
|
- reference_sequences:
|
||||||
|
type: file
|
||||||
|
description: Optional SAM header to replace existing header.
|
||||||
|
pattern: "*.sam"
|
||||||
|
- filter_regions_bed:
|
||||||
|
type: file
|
||||||
|
description: Optional BED file containing regions to filter.
|
||||||
|
pattern: "*.bed"
|
||||||
|
- reference_elfasta:
|
||||||
|
type: file
|
||||||
|
description: Elfasta file, required for BQSR and variant calling.
|
||||||
|
pattern: "*.elfasta"
|
||||||
|
- known_sites:
|
||||||
|
type: file
|
||||||
|
description: Optional elsites file containing known SNPs for BQSR.
|
||||||
|
pattern: "*.elsites"
|
||||||
|
- target_regions_bed:
|
||||||
|
type: file
|
||||||
|
description: Optional BED file containing target regions for BQSR and variant calling.
|
||||||
|
pattern: "*.bed"
|
||||||
|
- intermediate_bqsr_tables:
|
||||||
|
type: file
|
||||||
|
description: Optional list of BQSR tables, used when parsing files created by `elprep split`
|
||||||
|
pattern: "*.table"
|
||||||
|
- bqsr_tables_only:
|
||||||
|
type: boolean
|
||||||
|
description: Write intermediate BQSR tables, used when parsing files created by `elprep split`.
|
||||||
|
- get_activity_profile:
|
||||||
|
type: boolean
|
||||||
|
description: Get the activity profile calculated by the haplotypecaller to the given file in IGV format.
|
||||||
|
- get_assembly_regions:
|
||||||
|
type: boolean
|
||||||
|
description: Get the assembly regions calculated by haplotypecaller to the speficied file in IGV format.
|
||||||
|
output:
|
||||||
|
- meta:
|
||||||
|
type: map
|
||||||
|
description: |
|
||||||
|
Groovy Map containing sample information
|
||||||
|
e.g. [ id:'test', single_end:false ]
|
||||||
|
- versions:
|
||||||
|
type: file
|
||||||
|
description: File containing software versions
|
||||||
|
pattern: "versions.yml"
|
||||||
|
- bam:
|
||||||
|
type: file
|
||||||
|
description: Sorted, markdup, optionally BQSR BAM/SAM file
|
||||||
|
pattern: "*.{bam,sam}"
|
||||||
|
- metrics:
|
||||||
|
type: file
|
||||||
|
description: Optional duplicate metrics file generated by elprep
|
||||||
|
pattern: "*.{metrics.txt}"
|
||||||
|
- recall:
|
||||||
|
type: file
|
||||||
|
description: Optional recall metrics file generated by elprep
|
||||||
|
pattern: "*.{recall}"
|
||||||
|
- gvcf:
|
||||||
|
type: file
|
||||||
|
description: Optional GVCF output file
|
||||||
|
pattern: "*.{vcf.gz}"
|
||||||
|
- table:
|
||||||
|
type: file
|
||||||
|
description: Optional intermediate BQSR table output file
|
||||||
|
pattern: "*.{table}"
|
||||||
|
- activity_profile:
|
||||||
|
type: file
|
||||||
|
description: Optional activity profile output file
|
||||||
|
pattern: "*.{activity_profile.igv}"
|
||||||
|
- assembly_regions:
|
||||||
|
type: file
|
||||||
|
description: Optional activity regions output file
|
||||||
|
pattern: "*.{assembly_regions.igv}"
|
||||||
|
authors:
|
||||||
|
- "@matthdsm"
|
|
@ -599,6 +599,10 @@ ectyper:
|
||||||
- modules/ectyper/**
|
- modules/ectyper/**
|
||||||
- tests/modules/ectyper/**
|
- tests/modules/ectyper/**
|
||||||
|
|
||||||
|
elprep/filter:
|
||||||
|
- modules/elprep/filter/**
|
||||||
|
- tests/modules/elprep/filter/**
|
||||||
|
|
||||||
elprep/split:
|
elprep/split:
|
||||||
- modules/elprep/split/**
|
- modules/elprep/split/**
|
||||||
- tests/modules/elprep/split/**
|
- tests/modules/elprep/split/**
|
||||||
|
|
|
@ -112,6 +112,7 @@ params {
|
||||||
}
|
}
|
||||||
'homo_sapiens' {
|
'homo_sapiens' {
|
||||||
'genome' {
|
'genome' {
|
||||||
|
genome_elfasta = "${test_data_dir}/genomics/homo_sapiens/genome/genome.elfasta"
|
||||||
genome_fasta = "${test_data_dir}/genomics/homo_sapiens/genome/genome.fasta"
|
genome_fasta = "${test_data_dir}/genomics/homo_sapiens/genome/genome.fasta"
|
||||||
genome_fasta_fai = "${test_data_dir}/genomics/homo_sapiens/genome/genome.fasta.fai"
|
genome_fasta_fai = "${test_data_dir}/genomics/homo_sapiens/genome/genome.fasta.fai"
|
||||||
genome_dict = "${test_data_dir}/genomics/homo_sapiens/genome/genome.dict"
|
genome_dict = "${test_data_dir}/genomics/homo_sapiens/genome/genome.dict"
|
||||||
|
@ -123,6 +124,7 @@ params {
|
||||||
genome_header = "${test_data_dir}/genomics/homo_sapiens/genome/genome.header"
|
genome_header = "${test_data_dir}/genomics/homo_sapiens/genome/genome.header"
|
||||||
genome_bed_gz = "${test_data_dir}/genomics/homo_sapiens/genome/genome.bed.gz"
|
genome_bed_gz = "${test_data_dir}/genomics/homo_sapiens/genome/genome.bed.gz"
|
||||||
genome_bed_gz_tbi = "${test_data_dir}/genomics/homo_sapiens/genome/genome.bed.gz.tbi"
|
genome_bed_gz_tbi = "${test_data_dir}/genomics/homo_sapiens/genome/genome.bed.gz.tbi"
|
||||||
|
genome_elsites = "${test_data_dir}/genomics/homo_sapiens/genome/genome.elsites"
|
||||||
transcriptome_fasta = "${test_data_dir}/genomics/homo_sapiens/genome/transcriptome.fasta"
|
transcriptome_fasta = "${test_data_dir}/genomics/homo_sapiens/genome/transcriptome.fasta"
|
||||||
genome2_fasta = "${test_data_dir}/genomics/homo_sapiens/genome/genome2.fasta"
|
genome2_fasta = "${test_data_dir}/genomics/homo_sapiens/genome/genome2.fasta"
|
||||||
genome_chain_gz = "${test_data_dir}/genomics/homo_sapiens/genome/genome.chain.gz"
|
genome_chain_gz = "${test_data_dir}/genomics/homo_sapiens/genome/genome.chain.gz"
|
||||||
|
@ -136,6 +138,7 @@ params {
|
||||||
genome_21_multi_interval_bed_gz_tbi = "${test_data_dir}/genomics/homo_sapiens/genome/chr21/sequence/multi_intervals.bed.gz.tbi"
|
genome_21_multi_interval_bed_gz_tbi = "${test_data_dir}/genomics/homo_sapiens/genome/chr21/sequence/multi_intervals.bed.gz.tbi"
|
||||||
genome_21_chromosomes_dir = "${test_data_dir}/genomics/homo_sapiens/genome/chr21/sequence/chromosomes.tar.gz"
|
genome_21_chromosomes_dir = "${test_data_dir}/genomics/homo_sapiens/genome/chr21/sequence/chromosomes.tar.gz"
|
||||||
|
|
||||||
|
dbsnp_146_hg38_elsites = "${test_data_dir}/genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.elsites"
|
||||||
dbsnp_146_hg38_vcf_gz = "${test_data_dir}/genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz"
|
dbsnp_146_hg38_vcf_gz = "${test_data_dir}/genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz"
|
||||||
dbsnp_146_hg38_vcf_gz_tbi = "${test_data_dir}/genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz.tbi"
|
dbsnp_146_hg38_vcf_gz_tbi = "${test_data_dir}/genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz.tbi"
|
||||||
gnomad_r2_1_1_vcf_gz = "${test_data_dir}/genomics/homo_sapiens/genome/vcf/gnomAD.r2.1.1.vcf.gz"
|
gnomad_r2_1_1_vcf_gz = "${test_data_dir}/genomics/homo_sapiens/genome/vcf/gnomAD.r2.1.1.vcf.gz"
|
||||||
|
|
18
tests/modules/elprep/filter/main.nf
Normal file
18
tests/modules/elprep/filter/main.nf
Normal file
|
@ -0,0 +1,18 @@
|
||||||
|
#!/usr/bin/env nextflow
|
||||||
|
|
||||||
|
nextflow.enable.dsl = 2
|
||||||
|
|
||||||
|
include { ELPREP_FILTER } from '../../../../modules/elprep/filter/main.nf'
|
||||||
|
|
||||||
|
workflow test_elprep_filter {
|
||||||
|
|
||||||
|
input = [
|
||||||
|
[ id:'test', single_end:false ], // meta map
|
||||||
|
file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true)
|
||||||
|
]
|
||||||
|
reference_elfasta = file(params.test_data['homo_sapiens']['genome']['genome_elfasta'], checkIfExists: true)
|
||||||
|
known_sites_elsites = file(params.test_data['homo_sapiens']['genome']['dbsnp_146_hg38_elsites'], checkIfExists: true)
|
||||||
|
target_regions_bed = file(params.test_data['homo_sapiens']['genome']['genome_bed'], checkIfExists: true)
|
||||||
|
|
||||||
|
ELPREP_FILTER ( input, true, true, [], [], reference_elfasta, known_sites_elsites, target_regions_bed, [], [], true, true)
|
||||||
|
}
|
7
tests/modules/elprep/filter/nextflow.config
Normal file
7
tests/modules/elprep/filter/nextflow.config
Normal file
|
@ -0,0 +1,7 @@
|
||||||
|
process {
|
||||||
|
|
||||||
|
publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
|
||||||
|
withName: ELPREP_FILTER {
|
||||||
|
ext.args = "--mark-duplicates "
|
||||||
|
}
|
||||||
|
}
|
13
tests/modules/elprep/filter/test.yml
Normal file
13
tests/modules/elprep/filter/test.yml
Normal file
|
@ -0,0 +1,13 @@
|
||||||
|
- name: elprep filter test_elprep_filter
|
||||||
|
command: nextflow run tests/modules/elprep/filter -entry test_elprep_filter -c tests/config/nextflow.config
|
||||||
|
tags:
|
||||||
|
- elprep
|
||||||
|
- elprep/filter
|
||||||
|
files:
|
||||||
|
- path: output/elprep/test.activity_profile.igv
|
||||||
|
- path: output/elprep/test.assembly_regions.igv
|
||||||
|
- path: output/elprep/test.bam
|
||||||
|
- path: output/elprep/test.g.vcf.gz
|
||||||
|
- path: output/elprep/test.metrics.txt
|
||||||
|
- path: output/elprep/test.recall
|
||||||
|
- path: output/elprep/versions.yml
|
Loading…
Reference in a new issue