nf-core_modules/modules/vcftools/meta.yml

295 lines
10 KiB
YAML
Raw Normal View History

name: vcftools
description: A set of tools written in Perl and C++ for working with VCF files
keywords: VCF
- sort
tools:
- vcftools:
description: A set of tools written in Perl and C++ for working with VCF files. This package only contains the C++ libraries whereas the package perl-vcftools-vcf contains the perl libraries
homepage: http://vcftools.sourceforge.net/
documentation: http://vcftools.sourceforge.net/man_latest.html
tool_dev_url: None
doi:
licence: ["LGPL"]
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- variant_file:
type: file
description: variant input file which can be vcf, vcf.gz, or bcf format.
- bed:
type: file
description: bed file which can be used with different arguments in vcftools (optional)
- diff_variant_file:
type: file
description: secondary variant file which can be used with the 'diff' suite of tools (optional)
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- vcf:
type: file
description: vcf file (optional)
pattern: "*.vcf"
- bcf:
type: file
description: bcf file (optional)
pattern: "*.bcf"
- frq:
type: file
description: Allele frequency for each site (optional)
pattern: "*.frq"
- frq_count:
type: file
description: Allele counts for each site (optional)
pattern: "*.frq.count"
- idepth:
type: file
description: mean depth per individual (optional)
pattern: "*.idepth"
- ldepth:
type: file
description: depth per site summed across individuals (optional)
pattern: "*.ildepth"
- ldepth_mean:
type: file
description: mean depth per site calculated across individuals (optional)
pattern: "*.ldepth.mean"
- gdepth:
type: file
description: depth for each genotype in vcf file (optional)
pattern: "*.gdepth"
- hap_ld:
type: file
description: r2, D, and D statistics using phased haplotypes (optional)
pattern: "*.hap.ld"
- geno_ld:
type: file
description: squared correlation coefficient between genotypes encoded as 0, 1 and 2 to represent the number of non-reference alleles in each individual (optional)
pattern: "*.geno.ld"
- geno_chisq:
type: file
description: test for genotype independence via the chi-squared statistic (optional)
pattern: "*.geno.chisq"
- list_hap_ld:
type: file
description: r2 statistics of the sites contained in the provided input file verses all other sites (optional)
pattern: "*.list.hap.ld"
- list_geno_ld:
type: file
description: r2 statistics of the sites contained in the provided input file verses all other sites (optional)
pattern: "*.list.geno.ld"
- interchrom_hap_ld:
type: file
description: r2 statistics for sites (haplotypes) on different chromosomes (optional)
pattern: "*.interchrom.hap.ld"
- interchrom_geno_ld:
type: file
description: r2 statistics for sites (genotypes) on different chromosomes (optional)
pattern: "*.interchrom.geno.ld"
- tstv:
type: file
description: Transition / Transversion ratio in bins of size defined in options (optional)
pattern: "*.TsTv"
- tstv_summary:
type: file
description: Summary of all Transitions and Transversions (optional)
pattern: "*.TsTv.summary"
- tstv_count:
type: file
description: Transition / Transversion ratio as a function of alternative allele count (optional)
pattern: "*.TsTv.count"
- tstv_qual:
type: file
description: Transition / Transversion ratio as a function of SNP quality threshold (optional)
pattern: "*.TsTv.qual"
- filter_summary:
type: file
description: Summary of the number of SNPs and Ts/Tv ratio for each FILTER category (optional)
pattern: "*.FILTER.summary"
- sites_pi:
type: file
description: Nucleotide divergency on a per-site basis (optional)
pattern: "*.sites.pi"
- windowed_pi:
type: file
description: Nucleotide diversity in windows, with window size determined by options (optional)
pattern: "*windowed.pi"
- weir_fst:
type: file
description: Fst estimate from Weir and Cockerhams 1984 paper (optional)
pattern: "*.weir.fst"
- heterozygosity:
type: file
description: Heterozygosity on a per-individual basis (optional)
pattern: "*.het"
- hwe:
type: file
description: Contains the Observed numbers of Homozygotes and Heterozygotes and the corresponding Expected numbers under HWE (optional)
pattern: "*.hwe"
- tajima_d:
type: file
description: Tajimas D statistic in bins with size of the specified number in options (optional)
pattern: "*.Tajima.D"
- freq_burden:
type: file
description: Number of variants within each individual of a specific frequency in options (optional)
pattern: "*.ifreqburden"
- lroh:
type: file
description: Long Runs of Homozygosity (optional)
pattern: "*.LROH"
- relatedness:
type: file
description: Relatedness statistic based on the method of Yang et al, Nature Genetics 2010 (doi:10.1038/ng.608) (optional)
pattern: "*.relatedness"
- relatedness2:
type: file
description: Relatedness statistic based on the method of Manichaikul et al., BIOINFORMATICS 2010 (doi:10.1093/bioinformatics/btq559) (optional)
pattern: "*.relatedness2"
- lqual:
type: file
description: per-site SNP quality (optional)
pattern: "*.lqual"
- missing_individual:
type: file
description: Missingness on a per-individual basis (optional)
pattern: "*.imiss"
- missing_site:
type: file
description: Missingness on a per-site basis (optional)
pattern: "*.lmiss"
- snp_density:
type: file
description: Number and density of SNPs in bins of size defined by option (optional)
pattern: "*.snpden"
- kept_sites:
type: file
description: All sites that have been kept after filtering (optional)
pattern: "*.kept.sites"
- removed_sites:
type: file
description: All sites that have been removed after filtering (optional)
pattern: "*.removed.sites"
- singeltons:
type: file
description: Location of singletons, and the individual they occur in (optional)
pattern: "*.singeltons"
- indel_hist:
type: file
description: Histogram file of the length of all indels (including SNPs) (optional)
pattern: "*.indel_hist"
- hapcount:
type: file
description: Unique haplotypes within user specified bins (optional)
pattern: "*.hapcount"
- mendel:
type: file
description: Mendel errors identified in trios (optional)
pattern: "*.mendel"
- format:
type: file
description: Extracted information from the genotype fields in the VCF file relating to a specfied FORMAT identifier (optional)
pattern: "*.FORMAT"
- info:
type: file
description: Extracted information from the INFO field in the VCF file (optional)
pattern: "*.INFO"
- genotypes_matrix:
type: file
description: |
Genotypes output as large matrix.
Genotypes of each individual on a separate line.
Genotypes are represented as 0, 1 and 2, where the number represent that number of non-reference alleles.
Missing genotypes are represented by -1 (optional)
pattern: "*.012"
- genotypes_matrix_individual:
type: file
description: Details the individuals included in the main genotypes_matrix file (optional)
pattern: "*.012.indv"
- genotypes_matrix_position:
type: file
description: Details the site locations included in the main genotypes_matrix file (optional)
pattern: "*.012.pos"
- impute_hap:
type: file
description: Phased haplotypes in IMPUTE reference-panel format (optional)
pattern: "*.impute.hap"
- impute_hap_legend:
type: file
description: Impute haplotype legend file (optional)
pattern: "*.impute.hap.legend"
- impute_hap_indv:
type: file
description: Impute haplotype individuals file (optional)
pattern: "*.impute.hap.indv"
- ldhat_sites:
type: file
description: Output data in LDhat format, sites (optional)
pattern: "*.ldhat.sites"
- ldhat_locs:
type: file
description: output data in LDhat format, locations (optional)
pattern: "*.ldhat.locs"
- beagle_gl:
type: file
description: Genotype likelihoods for biallelic sites (optional)
pattern: "*.BEAGLE.GL"
- beagle_pl:
type: file
description: Genotype likelihoods for biallelic sites (optional)
pattern: "*.BEAGLE.PL"
- ped:
type: file
description: output the genotype data in PLINK PED format (optional)
pattern: "*.ped"
- map_:
type: file
description: output the genotype data in PLINK PED format (optional)
pattern: "*.map"
- tped:
type: file
description: output the genotype data in PLINK PED format (optional)
pattern: "*.tped"
- tfam:
type: file
description: output the genotype data in PLINK PED format (optional)
pattern: "*.tfam"
- diff_sites_in_files:
type: file
description: Sites that are common / unique to each file specified in optional inputs (optional)
pattern: "*.diff.sites.in.files"
- diff_indv_in_files:
type: file
description: Individuals that are common / unique to each file specified in optional inputs (optional)
pattern: "*.diff.indv.in.files"
- diff_sites:
type: file
description: Discordance on a site by site basis, specified in optional inputs (optional)
pattern: "*.diff.sites"
- diff_indv:
type: file
description: Discordance on a individual by individual basis, specified in optional inputs (optional)
pattern: "*.diff.indv"
- diff_discd_matrix:
type: file
description: Discordance matrix between files specified in optional inputs (optional)
pattern: "*.diff.discordance.matrix"
- diff_switch_error:
type: file
description: Switch errors found between sites (optional)
pattern: "*.diff.switch"
authors:
- "@Mark-S-Hill"