name: "multivcfanalyzer" description: SNP table generator from GATK UnifiedGenotyper with functionality geared for aDNA keywords: - vcf - ancient DNA - aDNA - SNP - GATK UnifiedGenotyper - SNP table tools: - "multivcfanalyzer": description: "MultiVCFAnalyzer is a VCF file post-processing tool tailored for aDNA. License on Github repository." homepage: "https://github.com/alexherbig/MultiVCFAnalyzer" documentation: "https://github.com/alexherbig/MultiVCFAnalyzer" tool_dev_url: "https://github.com/alexherbig/MultiVCFAnalyzer" doi: "10.1038/nature13591" licence: "['GPL >=3']" input: - vcfs: type: file description: One or a list of uncompressed VCF file pattern: "*.vcf" - fasta: type: file description: Reference genome VCF was generated against pattern: "*.{fasta,fna,fa}" - snpeff_results: type: file description: Results from snpEff in txt format (Optional) pattern: "*.txt" - gff: type: file description: GFF file corresponding to reference genome fasta (Optional) pattern: "*.gff" - allele_freqs: type: boolean description: | Whether to include the percentage of reads a given allele is present in in the SNP table. - genotype_quality: type: integer description: | Minimum GATK genotyping threshold threshold of which a SNP call falling under is 'discarded' - coverage: type: integer description: | Minimum number of a reads that a position must be covered by to be reported - homozygous_freq: type: number description: Fraction of reads a base must have to be called 'homozygous' - heterozygous_freq: type: mumber description: | Fraction of which whereby if a call falls above this value, and lower than the homozygous threshold, a base will be called 'heterozygous'. - gff_exclude: type: file description: | file listing positions that will be 'filtered' (i.e. ignored) (Optional) pattern: "*.vcf" output: - versions: type: file description: File containing software versions pattern: "versions.yml" - bam: type: file description: Sorted BAM/CRAM/SAM file pattern: "*.{bam,cram,sam}" - full_alignment: type: file description: Fasta a fasta file of all positions contained in the VCF files i.e. including ref calls pattern: ".fasta.gz" - info_txt: type: file description: Information about the run pattern: ".txt" - snp_alignment: type: file description: A fasta file of just SNP positions with samples only pattern: ".fasta.gz" - snp_genome_alignment: type: file description: A fasta file of just SNP positions with reference genome pattern: ".fasta.gz" - snpstatistics: type: file description: Some basic statistics about the SNP calls of each sample pattern: ".tsv" - snptable: type: file description: Basic SNP table of combined positions taken from each VCF file pattern: ".tsv" - snptable_snpeff: type: file description: Input file for SnpEff pattern: ".tsv" - snptable_uncertainty: type: file description: Same as above, but with lower case characters indicating uncertain calls pattern: ".tsv" - structure_genotypes: type: file description: Input file for STRUCTURE pattern: ".tsv" - structure_genotypes_nomissing: type: file description: Alternate input file for STRUCTURE pattern: ".tsv" - json: type: file description: Summary statistics in MultiQC JSON format pattern: ".json" authors: - "@jfy133"