Merge remote-tracking branch 'origin/master' into gatk_filter

This commit is contained in:
Rike 2022-05-18 15:52:47 +02:00
commit bb3e64647b
11 changed files with 316 additions and 0 deletions

View file

@ -0,0 +1,57 @@
process GATK4_CNNSCOREVARIANTS {
tag "$meta.id"
label 'process_low'
//Conda is not supported at the moment: https://github.com/broadinstitute/gatk/issues/7811
if (params.enable_conda) {
exit 1, "Conda environments cannot be used for GATK4/CNNScoreVariants at the moment. Please use docker or singularity containers."
}
container 'broadinstitute/gatk:4.2.6.1' //Biocontainers is missing a package
input:
tuple val(meta), path(vcf), path(aligned_input), path(intervals)
path fasta
path fai
path dict
path architecture
path weights
output:
tuple val(meta), path("*.vcf.gz"), emit: vcf
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def aligned_input = aligned_input ? "--input $aligned_input" : ""
def interval_command = intervals ? "--intervals $intervals" : ""
def architecture = architecture ? "--architecture $architecture" : ""
def weights = weights ? "--weights $weights" : ""
def avail_mem = 3
if (!task.memory) {
log.info '[GATK CnnScoreVariants] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.'
} else {
avail_mem = task.memory.giga
}
"""
gatk --java-options "-Xmx${avail_mem}g" CNNScoreVariants \\
--variant $vcf \\
--output ${prefix}.vcf.gz \\
--reference $fasta \\
$interval_command \\
$aligned_input \\
$architecture \\
$weights \\
--tmp-dir . \\
$args
cat <<-END_VERSIONS > versions.yml
"${task.process}":
gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//')
END_VERSIONS
"""
}

View file

@ -0,0 +1,72 @@
name: "gatk4_cnnscorevariants"
description: Apply a Convolutional Neural Net to filter annotated variants
keywords:
- gatk4_cnnscorevariants
- gatk4
- variants
tools:
- gatk4:
description: |
Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools
with a primary focus on variant discovery and genotyping. Its powerful processing engine
and high-performance computing features make it capable of taking on projects of any size.
homepage: https://gatk.broadinstitute.org/hc/en-us
documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s
doi: 10.1158/1538-7445.AM2017-3590
licence: ["Apache-2.0"]
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- vcf:
type: file
description: VCF file
pattern: "*.vcf.gz"
- aligned_input:
type: file
description: BAM/CRAM file from alignment (optional)
pattern: "*.{bam,cram}"
- intervals:
type: file
description: Bed file with the genomic regions included in the library (optional)
- fasta:
type: file
description: The reference fasta file
pattern: "*.fasta"
- fai:
type: file
description: Index of reference fasta file
pattern: "*.fasta.fai"
- dict:
type: file
description: GATK sequence dictionary
pattern: "*.dict"
- architecture:
type: file
description: Neural Net architecture configuration json file (optional)
pattern: "*.json"
- weights:
type: file
description: Keras model HD5 file with neural net weights. (optional)
pattern: "*.hd5"
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- vcf:
type: file
description: Annotated VCF file
pattern: "*.vcf"
authors:
- "@FriederikeHanssen"

View file

@ -0,0 +1,54 @@
process MOTUS_PROFILE {
tag "$meta.id"
label 'process_medium'
conda (params.enable_conda ? "bioconda::motus=3.0.1" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/motus:3.0.1--pyhdfd78af_0':
'quay.io/biocontainers/motus:3.0.1--pyhdfd78af_0' }"
input:
tuple val(meta), path(reads)
path db
output:
tuple val(meta), path("*.out"), emit: out
tuple val(meta), path("*.bam"), optional: true, emit: bam
tuple val(meta), path("*.mgc"), optional: true, emit: mgc
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def inputs = reads[0].getExtension() == 'bam' ?
"-i ${reads}" :
reads[0].getExtension() == 'mgc' ? "-m $reads" :
meta.single_end ?
"-s $reads" : "-f ${reads[0]} -r ${reads[1]}"
def refdb = db ? "-db ${db}" : ""
"""
motus profile \\
$args \\
$inputs \\
$refdb \\
-t $task.cpus \\
-n $prefix \\
-o ${prefix}.out
## mOTUs version number is not available from command line.
## mOTUs save the version number in index database folder.
## mOTUs will check the database version is same version as exec version.
if [ "$db" == "" ]; then
VERSION=\$(echo \$(motus -h 2>&1) | sed 's/^.*Version: //; s/References.*\$//')
else
VERSION=\$(grep motus $db/db_mOTU_versions | sed 's/motus\\t//g')
fi
cat <<-END_VERSIONS > versions.yml
"${task.process}":
mOTUs: \$VERSION
END_VERSIONS
"""
}

View file

@ -0,0 +1,61 @@
name: "motus_profile"
description: Taxonomic meta-omics profiling using universal marker genes
keywords:
- classify
- metagenomics
- fastq
- taxonomic profiling
tools:
- "motus":
description: "Marker gene-based OTU (mOTU) profiling"
homepage: "https://motu-tool.org/"
documentation: "https://github.com/motu-tool/mOTUs/wiki"
tool_dev_url: "https://github.com/motu-tool/mOTUs"
doi: "10.1038/s41467-019-08844-4"
licence: "['GPL v3']"
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- reads:
type: file
description: |
List of input fastq/fasta files of size 1 and 2 for single-end and paired-end data,
respectively.
Or the intermediate bam file mapped by bwa to the mOTUs database or
the output bam file from motus profile.
Or the intermediate mgc read counts table.
pattern: "*.{fastq,fq,fasta,fa,fastq.gz,fq.gz,fasta.gz,fa.gz,.bam,.mgc}"
- db:
type: directory
description: |
mOTUs database downloaded by `motus downloadDB`
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- out:
type: file
description: Results with taxonomic classification of each read
pattern: "*.out"
- bam:
type: file
description: Optional intermediate sorted BAM file from BWA
pattern: "*.{bam}"
- mgc:
type: file
description: Optional intermediate mgc read count table file saved with `-M`.
pattern: "*.{mgc}"
authors:
- "@jianhong"

View file

@ -731,6 +731,10 @@ gatk4/calculatecontamination:
- modules/gatk4/calculatecontamination/**
- tests/modules/gatk4/calculatecontamination/**
gatk4/cnnscorevariants:
- modules/gatk4/cnnscorevariants/**
- tests/modules/gatk4/cnnscorevariants/**
gatk4/combinegvcfs:
- modules/gatk4/combinegvcfs/**
- tests/modules/gatk4/combinegvcfs/**
@ -1314,6 +1318,10 @@ motus/downloaddb:
- modules/motus/downloaddb/**
- tests/modules/motus/downloaddb/**
motus/profile:
- modules/motus/profile/**
- tests/modules/motus/profile/**
msisensor/msi:
- modules/msisensor/msi/**
- tests/modules/msisensor/msi/**

View file

@ -0,0 +1,18 @@
#!/usr/bin/env nextflow
nextflow.enable.dsl = 2
include { GATK4_CNNSCOREVARIANTS } from '../../../../modules/gatk4/cnnscorevariants/main.nf'
workflow test_gatk4_cnnscorevariants {
input = [ [ id:'test' ], // meta map
file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf'], checkIfExists: true),
[],
[]
]
fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true)
GATK4_CNNSCOREVARIANTS ( input, fasta, fai, dict, [], [] )
}

View file

@ -0,0 +1,5 @@
process {
publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
}

View file

@ -0,0 +1,9 @@
- name: gatk4 cnnscorevariants test_gatk4_cnnscorevariants
command: nextflow run ./tests/modules/gatk4/cnnscorevariants -entry test_gatk4_cnnscorevariants -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/cnnscorevariants/nextflow.config
tags:
- gatk4
- gatk4/cnnscorevariants
files:
- path: output/gatk4/test.vcf.gz
contains:
- "##ALT=<ID=NON_REF,Description="

View file

@ -0,0 +1,19 @@
#!/usr/bin/env nextflow
nextflow.enable.dsl = 2
include { MOTUS_DOWNLOADDB } from '../../../../modules/motus/downloaddb/main.nf'
include { MOTUS_PROFILE } from '../../../../modules/motus/profile/main.nf'
workflow test_motus_profile_single_end {
input = [
[ id:'test', single_end:true ], // meta map
file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true)
]
MOTUS_DOWNLOADDB(file('https://raw.githubusercontent.com/motu-tool/mOTUs/master/motus/downloadDB.py'))
MOTUS_PROFILE ( input, MOTUS_DOWNLOADDB.out.db )
}

View file

@ -0,0 +1,5 @@
process {
publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
}

View file

@ -0,0 +1,8 @@
- name: motus profile test_motus_profile_single_end
command: nextflow run tests/modules/motus/profile -entry test_motus_profile_single_end -c tests/config/nextflow.config
tags:
- motus
- motus/profile
files:
- path: output/motus/test.out
contains: ["#consensus_taxonomy\ttest"]