nf-core_modules/modules/multivcfanalyzer/meta.yml
James A. Fellows Yates aed45dd766
Add MultiVCFAnalyzer (#1845)
* Add MultiVCFAnalyzer

* Fix versions

* Fix tests due to md5sum var

* Apply suggestions from code review

* Linting

* Apply suggestions from code review

Co-authored-by: Robert A. Petit III <robbie.petit@gmail.com>

Co-authored-by: Robert A. Petit III <robbie.petit@gmail.com>
2022-07-05 09:47:10 +02:00

122 lines
3.7 KiB
YAML

name: "multivcfanalyzer"
description: SNP table generator from GATK UnifiedGenotyper with functionality geared for aDNA
keywords:
- vcf
- ancient DNA
- aDNA
- SNP
- GATK UnifiedGenotyper
- SNP table
tools:
- "multivcfanalyzer":
description: "MultiVCFAnalyzer is a VCF file post-processing tool tailored for aDNA. License on Github repository."
homepage: "https://github.com/alexherbig/MultiVCFAnalyzer"
documentation: "https://github.com/alexherbig/MultiVCFAnalyzer"
tool_dev_url: "https://github.com/alexherbig/MultiVCFAnalyzer"
doi: "10.1038/nature13591"
licence: "['GPL >=3']"
input:
- vcfs:
type: file
description: One or a list of uncompressed VCF file
pattern: "*.vcf"
- fasta:
type: file
description: Reference genome VCF was generated against
pattern: "*.{fasta,fna,fa}"
- snpeff_results:
type: file
description: Results from snpEff in txt format (Optional)
pattern: "*.txt"
- gff:
type: file
description: GFF file corresponding to reference genome fasta (Optional)
pattern: "*.gff"
- allele_freqs:
type: boolean
description: |
Whether to include the percentage of reads a given allele is
present in in the SNP table.
- genotype_quality:
type: integer
description: |
Minimum GATK genotyping threshold threshold of which a SNP call
falling under is 'discarded'
- coverage:
type: integer
description: |
Minimum number of a reads that a position must be covered by to be
reported
- homozygous_freq:
type: number
description: Fraction of reads a base must have to be called 'homozygous'
- heterozygous_freq:
type: mumber
description: |
Fraction of which whereby if a call falls above this value, and lower
than the homozygous threshold, a base will be called 'heterozygous'.
- gff_exclude:
type: file
description: |
file listing positions that will be 'filtered' (i.e. ignored)
(Optional)
pattern: "*.vcf"
output:
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- bam:
type: file
description: Sorted BAM/CRAM/SAM file
pattern: "*.{bam,cram,sam}"
- full_alignment:
type: file
description: Fasta a fasta file of all positions contained in the VCF files i.e. including ref calls
pattern: ".fasta.gz"
- info_txt:
type: file
description: Information about the run
pattern: ".txt"
- snp_alignment:
type: file
description: A fasta file of just SNP positions with samples only
pattern: ".fasta.gz"
- snp_genome_alignment:
type: file
description: A fasta file of just SNP positions with reference genome
pattern: ".fasta.gz"
- snpstatistics:
type: file
description: Some basic statistics about the SNP calls of each sample
pattern: ".tsv"
- snptable:
type: file
description: Basic SNP table of combined positions taken from each VCF file
pattern: ".tsv"
- snptable_snpeff:
type: file
description: Input file for SnpEff
pattern: ".tsv"
- snptable_uncertainty:
type: file
description: Same as above, but with lower case characters indicating uncertain calls
pattern: ".tsv"
- structure_genotypes:
type: file
description: Input file for STRUCTURE
pattern: ".tsv"
- structure_genotypes_nomissing:
type: file
description: Alternate input file for STRUCTURE
pattern: ".tsv"
- json:
type: file
description: Summary statistics in MultiQC JSON format
pattern: ".json"
authors:
- "@jfy133"