mirror of
https://github.com/MillironX/nf-core_modules.git
synced 2024-12-22 02:58:17 +00:00
Merge branch 'master' into tool/elprep-merge
This commit is contained in:
commit
4af5be8de3
7 changed files with 240 additions and 0 deletions
89
modules/elprep/filter/main.nf
Normal file
89
modules/elprep/filter/main.nf
Normal file
|
@ -0,0 +1,89 @@
|
|||
process ELPREP_FILTER {
|
||||
tag "$meta.id"
|
||||
label 'process_high'
|
||||
|
||||
conda (params.enable_conda ? "bioconda::elprep=5.1.2" : null)
|
||||
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
|
||||
'https://depot.galaxyproject.org/singularity/elprep:5.1.2--he881be0_0':
|
||||
'quay.io/biocontainers/elprep:5.1.2--he881be0_0' }"
|
||||
|
||||
input:
|
||||
tuple val(meta), path(bam)
|
||||
val(run_haplotypecaller)
|
||||
val(run_bqsr)
|
||||
path(reference_sequences)
|
||||
path(filter_regions_bed)
|
||||
path(reference_elfasta)
|
||||
path(known_sites_elsites)
|
||||
path(target_regions_bed)
|
||||
path(intermediate_bqsr_tables)
|
||||
val(bqsr_tables_only)
|
||||
val(get_activity_profile)
|
||||
val(get_assembly_regions)
|
||||
|
||||
|
||||
output:
|
||||
tuple val(meta), path("**.{bam,sam}") ,emit: bam
|
||||
tuple val(meta), path("*.metrics.txt") ,optional: true, emit: metrics
|
||||
tuple val(meta), path("*.recall") ,optional: true, emit: recall
|
||||
tuple val(meta), path("*.vcf.gz") ,optional: true, emit: gvcf
|
||||
tuple val(meta), path("*.table") ,optional: true, emit: table
|
||||
tuple val(meta), path("*.activity_profile.igv") ,optional: true, emit: activity_profile
|
||||
tuple val(meta), path("*.assembly_regions.igv") ,optional: true, emit: assembly_regions
|
||||
path "versions.yml" ,emit: versions
|
||||
|
||||
when:
|
||||
task.ext.when == null || task.ext.when
|
||||
|
||||
script:
|
||||
def args = task.ext.args ?: ''
|
||||
def prefix = task.ext.prefix ?: "${meta.id}"
|
||||
def suffix = args.contains("--output-type sam") ? "sam" : "bam"
|
||||
|
||||
// filter args
|
||||
def reference_sequences_cmd = reference_sequences ? " --replace-reference-sequences ${reference_sequences}" : ""
|
||||
def filter_regions_cmd = filter_regions_bed ? " --filter-non-overlapping-reads ${filter_regions_bed}" : ""
|
||||
|
||||
// markdup args
|
||||
def markdup_cmd = args.contains("--mark-duplicates") ? " --mark-optical-duplicates ${prefix}.metrics.txt": ""
|
||||
|
||||
// variant calling args
|
||||
def haplotyper_cmd = run_haplotypecaller ? " --haplotypecaller ${prefix}.g.vcf.gz": ""
|
||||
|
||||
def fasta_cmd = reference_elfasta ? " --reference ${reference_elfasta}": ""
|
||||
def known_sites_cmd = known_sites_elsites ? " --known-sites ${known_sites_elsites}": ""
|
||||
def target_regions_cmd = target_regions_bed ? " --target-regions ${target_regions_bed}": ""
|
||||
|
||||
// bqsr args
|
||||
def bqsr_cmd = run_bqsr ? " --bqsr ${prefix}.recall": ""
|
||||
def bqsr_tables_only_cmd = bqsr_tables_only ? " --bqsr-tables-only ${prefix}.table": ""
|
||||
|
||||
def intermediate_bqsr_cmd = intermediate_bqsr_tables ? " --bqsr-apply .": ""
|
||||
|
||||
// misc
|
||||
def activity_profile_cmd = get_activity_profile ? " --activity-profile ${prefix}.activity_profile.igv": ""
|
||||
def assembly_regions_cmd = get_assembly_regions ? " --assembly-regions ${prefix}.assembly_regions.igv": ""
|
||||
|
||||
"""
|
||||
elprep filter ${bam} ${prefix}.${suffix} \\
|
||||
${reference_sequences_cmd} \\
|
||||
${filter_regions_cmd} \\
|
||||
${markdup_cmd} \\
|
||||
${haplotyper_cmd} \\
|
||||
${fasta_cmd} \\
|
||||
${known_sites_cmd} \\
|
||||
${target_regions_cmd} \\
|
||||
${bqsr_cmd} \\
|
||||
${bqsr_tables_only_cmd} \\
|
||||
${intermediate_bqsr_cmd} \\
|
||||
${activity_profile_cmd} \\
|
||||
${assembly_regions_cmd} \\
|
||||
--nr-of-threads ${task.cpus} \\
|
||||
$args
|
||||
|
||||
cat <<-END_VERSIONS > versions.yml
|
||||
"${task.process}":
|
||||
elprep: \$(elprep 2>&1 | head -n2 | tail -n1 |sed 's/^.*version //;s/ compiled.*\$//')
|
||||
END_VERSIONS
|
||||
"""
|
||||
}
|
106
modules/elprep/filter/meta.yml
Normal file
106
modules/elprep/filter/meta.yml
Normal file
|
@ -0,0 +1,106 @@
|
|||
name: "elprep_filter"
|
||||
description: "Filter, sort and markdup sam/bam files, with optional BQSR and variant calling."
|
||||
keywords:
|
||||
- sort
|
||||
- bam
|
||||
- sam
|
||||
- filter
|
||||
- variant calling
|
||||
tools:
|
||||
- "elprep":
|
||||
description: "elPrep is a high-performance tool for preparing .sam/.bam files for variant calling in sequencing pipelines. It can be used as a drop-in replacement for SAMtools/Picard/GATK4."
|
||||
homepage: "https://github.com/ExaScience/elprep"
|
||||
documentation: "https://github.com/ExaScience/elprep"
|
||||
tool_dev_url: "https://github.com/ExaScience/elprep"
|
||||
doi: "10.1371/journal.pone.0244471"
|
||||
licence: "['AGPL v3']"
|
||||
|
||||
input:
|
||||
- meta:
|
||||
type: map
|
||||
description: |
|
||||
Groovy Map containing sample information
|
||||
e.g. [ id:'test', single_end:false ]
|
||||
- bam:
|
||||
type: file
|
||||
description: Input SAM/BAM file
|
||||
pattern: "*.{bam,sam}"
|
||||
- run_haplotypecaller:
|
||||
type: boolean
|
||||
description: Run variant calling on the input files. Needed to generate gvcf output.
|
||||
- run_bqsr:
|
||||
type: boolean
|
||||
description: Run BQSR on the input files. Needed to generate recall metrics.
|
||||
- reference_sequences:
|
||||
type: file
|
||||
description: Optional SAM header to replace existing header.
|
||||
pattern: "*.sam"
|
||||
- filter_regions_bed:
|
||||
type: file
|
||||
description: Optional BED file containing regions to filter.
|
||||
pattern: "*.bed"
|
||||
- reference_elfasta:
|
||||
type: file
|
||||
description: Elfasta file, required for BQSR and variant calling.
|
||||
pattern: "*.elfasta"
|
||||
- known_sites:
|
||||
type: file
|
||||
description: Optional elsites file containing known SNPs for BQSR.
|
||||
pattern: "*.elsites"
|
||||
- target_regions_bed:
|
||||
type: file
|
||||
description: Optional BED file containing target regions for BQSR and variant calling.
|
||||
pattern: "*.bed"
|
||||
- intermediate_bqsr_tables:
|
||||
type: file
|
||||
description: Optional list of BQSR tables, used when parsing files created by `elprep split`
|
||||
pattern: "*.table"
|
||||
- bqsr_tables_only:
|
||||
type: boolean
|
||||
description: Write intermediate BQSR tables, used when parsing files created by `elprep split`.
|
||||
- get_activity_profile:
|
||||
type: boolean
|
||||
description: Get the activity profile calculated by the haplotypecaller to the given file in IGV format.
|
||||
- get_assembly_regions:
|
||||
type: boolean
|
||||
description: Get the assembly regions calculated by haplotypecaller to the speficied file in IGV format.
|
||||
output:
|
||||
- meta:
|
||||
type: map
|
||||
description: |
|
||||
Groovy Map containing sample information
|
||||
e.g. [ id:'test', single_end:false ]
|
||||
- versions:
|
||||
type: file
|
||||
description: File containing software versions
|
||||
pattern: "versions.yml"
|
||||
- bam:
|
||||
type: file
|
||||
description: Sorted, markdup, optionally BQSR BAM/SAM file
|
||||
pattern: "*.{bam,sam}"
|
||||
- metrics:
|
||||
type: file
|
||||
description: Optional duplicate metrics file generated by elprep
|
||||
pattern: "*.{metrics.txt}"
|
||||
- recall:
|
||||
type: file
|
||||
description: Optional recall metrics file generated by elprep
|
||||
pattern: "*.{recall}"
|
||||
- gvcf:
|
||||
type: file
|
||||
description: Optional GVCF output file
|
||||
pattern: "*.{vcf.gz}"
|
||||
- table:
|
||||
type: file
|
||||
description: Optional intermediate BQSR table output file
|
||||
pattern: "*.{table}"
|
||||
- activity_profile:
|
||||
type: file
|
||||
description: Optional activity profile output file
|
||||
pattern: "*.{activity_profile.igv}"
|
||||
- assembly_regions:
|
||||
type: file
|
||||
description: Optional activity regions output file
|
||||
pattern: "*.{assembly_regions.igv}"
|
||||
authors:
|
||||
- "@matthdsm"
|
|
@ -599,6 +599,10 @@ ectyper:
|
|||
- modules/ectyper/**
|
||||
- tests/modules/ectyper/**
|
||||
|
||||
elprep/filter:
|
||||
- modules/elprep/filter/**
|
||||
- tests/modules/elprep/filter/**
|
||||
|
||||
elprep/merge:
|
||||
- modules/elprep/merge/**
|
||||
- tests/modules/elprep/merge/**
|
||||
|
|
|
@ -112,6 +112,7 @@ params {
|
|||
}
|
||||
'homo_sapiens' {
|
||||
'genome' {
|
||||
genome_elfasta = "${test_data_dir}/genomics/homo_sapiens/genome/genome.elfasta"
|
||||
genome_fasta = "${test_data_dir}/genomics/homo_sapiens/genome/genome.fasta"
|
||||
genome_fasta_fai = "${test_data_dir}/genomics/homo_sapiens/genome/genome.fasta.fai"
|
||||
genome_dict = "${test_data_dir}/genomics/homo_sapiens/genome/genome.dict"
|
||||
|
@ -123,6 +124,7 @@ params {
|
|||
genome_header = "${test_data_dir}/genomics/homo_sapiens/genome/genome.header"
|
||||
genome_bed_gz = "${test_data_dir}/genomics/homo_sapiens/genome/genome.bed.gz"
|
||||
genome_bed_gz_tbi = "${test_data_dir}/genomics/homo_sapiens/genome/genome.bed.gz.tbi"
|
||||
genome_elsites = "${test_data_dir}/genomics/homo_sapiens/genome/genome.elsites"
|
||||
transcriptome_fasta = "${test_data_dir}/genomics/homo_sapiens/genome/transcriptome.fasta"
|
||||
genome2_fasta = "${test_data_dir}/genomics/homo_sapiens/genome/genome2.fasta"
|
||||
genome_chain_gz = "${test_data_dir}/genomics/homo_sapiens/genome/genome.chain.gz"
|
||||
|
@ -136,6 +138,7 @@ params {
|
|||
genome_21_multi_interval_bed_gz_tbi = "${test_data_dir}/genomics/homo_sapiens/genome/chr21/sequence/multi_intervals.bed.gz.tbi"
|
||||
genome_21_chromosomes_dir = "${test_data_dir}/genomics/homo_sapiens/genome/chr21/sequence/chromosomes.tar.gz"
|
||||
|
||||
dbsnp_146_hg38_elsites = "${test_data_dir}/genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.elsites"
|
||||
dbsnp_146_hg38_vcf_gz = "${test_data_dir}/genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz"
|
||||
dbsnp_146_hg38_vcf_gz_tbi = "${test_data_dir}/genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz.tbi"
|
||||
gnomad_r2_1_1_vcf_gz = "${test_data_dir}/genomics/homo_sapiens/genome/vcf/gnomAD.r2.1.1.vcf.gz"
|
||||
|
|
18
tests/modules/elprep/filter/main.nf
Normal file
18
tests/modules/elprep/filter/main.nf
Normal file
|
@ -0,0 +1,18 @@
|
|||
#!/usr/bin/env nextflow
|
||||
|
||||
nextflow.enable.dsl = 2
|
||||
|
||||
include { ELPREP_FILTER } from '../../../../modules/elprep/filter/main.nf'
|
||||
|
||||
workflow test_elprep_filter {
|
||||
|
||||
input = [
|
||||
[ id:'test', single_end:false ], // meta map
|
||||
file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true)
|
||||
]
|
||||
reference_elfasta = file(params.test_data['homo_sapiens']['genome']['genome_elfasta'], checkIfExists: true)
|
||||
known_sites_elsites = file(params.test_data['homo_sapiens']['genome']['dbsnp_146_hg38_elsites'], checkIfExists: true)
|
||||
target_regions_bed = file(params.test_data['homo_sapiens']['genome']['genome_bed'], checkIfExists: true)
|
||||
|
||||
ELPREP_FILTER ( input, true, true, [], [], reference_elfasta, known_sites_elsites, target_regions_bed, [], [], true, true)
|
||||
}
|
7
tests/modules/elprep/filter/nextflow.config
Normal file
7
tests/modules/elprep/filter/nextflow.config
Normal file
|
@ -0,0 +1,7 @@
|
|||
process {
|
||||
|
||||
publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
|
||||
withName: ELPREP_FILTER {
|
||||
ext.args = "--mark-duplicates "
|
||||
}
|
||||
}
|
13
tests/modules/elprep/filter/test.yml
Normal file
13
tests/modules/elprep/filter/test.yml
Normal file
|
@ -0,0 +1,13 @@
|
|||
- name: elprep filter test_elprep_filter
|
||||
command: nextflow run tests/modules/elprep/filter -entry test_elprep_filter -c tests/config/nextflow.config
|
||||
tags:
|
||||
- elprep
|
||||
- elprep/filter
|
||||
files:
|
||||
- path: output/elprep/test.activity_profile.igv
|
||||
- path: output/elprep/test.assembly_regions.igv
|
||||
- path: output/elprep/test.bam
|
||||
- path: output/elprep/test.g.vcf.gz
|
||||
- path: output/elprep/test.metrics.txt
|
||||
- path: output/elprep/test.recall
|
||||
- path: output/elprep/versions.yml
|
Loading…
Reference in a new issue