Merge branch 'nf-core:master' into gamma-update

This commit is contained in:
Jill V. Hagey, PhD 2022-05-24 19:28:06 -04:00 committed by GitHub
commit b1dd1d8a52
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
11 changed files with 366 additions and 0 deletions

View file

@ -0,0 +1,61 @@
process BCFTOOLS_ROH {
tag "$meta.id"
label 'process_medium'
conda (params.enable_conda ? "bioconda::bcftools=1.15.1" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/bcftools:1.15.1--h0ea216a_0':
'quay.io/biocontainers/bcftools:1.15.1--h0ea216a_0' }"
input:
tuple val(meta), path(vcf), path(tbi)
path af_file
path genetic_map
path regions_file
path samples_file
path targets_file
output:
tuple val(meta), path("*.roh"), emit: roh
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def af_read = af_file ? "--AF-file ${af_file}" : ''
def gen_map = genetic_map ? "--genetic-map ${genetic_map}" : ''
def reg_file = regions_file ? "--regions-file ${regions_file}" : ''
def samp_file = samples_file ? "--samples-file ${samples_file}" : ''
def targ_file = targets_file ? "--targets-file ${targets_file}" : ''
"""
bcftools \\
roh \\
$args \\
$af_read \\
$gen_map \\
$reg_file \\
$samp_file \\
$targ_file \\
-o ${prefix}.roh \\
$vcf
cat <<-END_VERSIONS > versions.yml
"${task.process}":
bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//')
END_VERSIONS
"""
stub:
def prefix = task.ext.prefix ?: "${meta.id}"
"""
touch ${prefix}.roh
cat <<-END_VERSIONS > versions.yml
"${task.process}":
bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//')
END_VERSIONS
"""
}

View file

@ -0,0 +1,55 @@
name: "bcftools_roh"
description: A program for detecting runs of homo/autozygosity. Only bi-allelic sites are considered.
keywords:
- roh
tools:
- "roh":
description: "A program for detecting runs of homo/autozygosity. Only bi-allelic sites are considered."
homepage: https://www.htslib.org/
documentation: http://www.htslib.org/doc/bcftools.html
doi: 10.1093/bioinformatics/btp352
licence: ["MIT"]
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- vcf:
type: file
description: VCF file
pattern: "*.{vcf,.vcf.gz}"
- af_file:
type: file
description: "Read allele frequencies from a tab-delimited file containing the columns: CHROM\tPOS\tREF,ALT\tAF."
- genetic_map:
type: file
description: "Genetic map in the format required also by IMPUTE2."
- regions_file:
type: file
description: "Regions can be specified either on command line or in a VCF, BED, or tab-delimited file (the default)."
- samples_file:
type: file
description: "File of sample names to include or exclude if prefixed with '^'."
- targets_file:
type: file
description: "Targets can be specified either on command line or in a VCF, BED, or tab-delimited file (the default)."
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- roh:
type: file
description: Contains site-specific and/or per-region runs of homo/autozygosity calls.
pattern: "*.{roh}"
authors:
- "@ramprasadn"

42
modules/kat/hist/main.nf Normal file
View file

@ -0,0 +1,42 @@
process KAT_HIST {
tag "$meta.id"
label 'process_medium'
conda (params.enable_conda ? "bioconda::kat=2.4.2" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/kat:2.4.2--py38hfc5f9d8_2':
'quay.io/biocontainers/kat:2.4.2--py38hfc5f9d8_2' }"
input:
tuple val(meta), path(reads)
output:
tuple val(meta), path("*.hist") , emit: hist
tuple val(meta), path("*.hist.dist_analysis.json"), emit: json
tuple val(meta), path("*.png") , emit: png , optional: true
tuple val(meta), path("*.ps") , emit: ps , optional: true
tuple val(meta), path("*.pdf") , emit: pdf , optional: true
tuple val(meta), path("*-hash.jf*") , emit: jellyfish_hash, optional: true
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
"""
kat hist \\
--threads $task.cpus \\
--output_prefix ${prefix}.hist \\
$args \\
$reads
ls -l
cat <<-END_VERSIONS > versions.yml
"${task.process}":
kat: \$( kat hist --version | sed 's/kat //' )
END_VERSIONS
"""
}

64
modules/kat/hist/meta.yml Normal file
View file

@ -0,0 +1,64 @@
name: "kat_hist"
description: Creates a histogram of the number of distinct k-mers having a given frequency.
keywords:
- k-mer
- histogram
- count
tools:
- "kat":
description: "KAT is a suite of tools that analyse jellyfish hashes or sequence files (fasta or fastq) using kmer counts"
homepage: https://www.earlham.ac.uk/kat-tools
documentation: https://kat.readthedocs.io/en/latest/index.html
tool_dev_url: https://github.com/TGAC/KAT
doi: http://bioinformatics.oxfordjournals.org/content/early/2016/10/20/bioinformatics.btw663.abstract
licence: "['GPL v3']"
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- reads:
type: file
description: |
List of input FastQ files of size 1 and 2 for single-end and paired-end data,
respectively.
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- hist:
type: file
description: KAT histogram of k-mer counts
pattern: "*.hist"
- json:
type: file
description: KAT histogram summary of distance analysis
pattern: "*.hist.dist_analysis.json"
- png:
type: file
description: KAT plot of k-mer histogram in PNG format
pattern: "*.png"
- ps:
type: file
description: KAT plot of k-mer histogram in PS format
pattern: "*.ps"
- pdf:
type: file
description: KAT plot of k-mer histogram in PDF format
pattern: "*.pdf"
- jellyfish_hash:
type: file
description: Jellyfish hash file
pattern: "*-hist.jf*"
authors:
- "@mahesh-panchal"

View file

@ -166,6 +166,10 @@ bcftools/reheader:
- modules/bcftools/reheader/**
- tests/modules/bcftools/reheader/**
bcftools/roh:
- modules/bcftools/roh/**
- tests/modules/bcftools/roh/**
bcftools/sort:
- modules/bcftools/sort/**
- tests/modules/bcftools/sort/**
@ -1089,6 +1093,10 @@ kallistobustools/ref:
- modules/kallistobustools/ref/**
- tests/modules/kallistobustools/ref/**
kat/hist:
- modules/kat/hist/**
- tests/modules/kat/hist/**
khmer/normalizebymedian:
- modules/khmer/normalizebymedian/**
- tests/modules/khmer/normalizebymedian/**

View file

@ -0,0 +1,35 @@
#!/usr/bin/env nextflow
nextflow.enable.dsl = 2
include { BCFTOOLS_ROH } from '../../../../modules/bcftools/roh/main.nf'
workflow test_bcftools_roh {
input = [ [ id:'test' ], // meta map
file(params.test_data['sarscov2']['illumina']['test_vcf_gz'], checkIfExists: true),
file(params.test_data['sarscov2']['illumina']['test_vcf_gz_tbi'], checkIfExists: true)]
af_file = []
gen_map = []
regions = []
targets = []
samples = []
BCFTOOLS_ROH ( input, af_file, gen_map, regions, samples, targets )
}
workflow test_bcftools_roh_stub {
input = [ [ id:'test' ], // meta map
file(params.test_data['sarscov2']['illumina']['test_vcf_gz'], checkIfExists: true),
file(params.test_data['sarscov2']['illumina']['test_vcf_gz_tbi'], checkIfExists: true)]
af_file = []
gen_map = []
regions = []
targets = []
samples = []
BCFTOOLS_ROH ( input, af_file, gen_map, regions, samples, targets )
}

View file

@ -0,0 +1,5 @@
process {
publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
}

View file

@ -0,0 +1,17 @@
- name: "bcftools roh"
command: nextflow run ./tests/modules/bcftools/roh -entry test_bcftools_roh -c ./tests/config/nextflow.config -c ./tests/modules/bcftools/roh/nextflow.config
tags:
- "bcftools"
- "bcftools/roh"
files:
- path: "output/bcftools/test.roh"
- path: "output/bcftools/versions.yml"
- name: "bcftools roh stub"
command: nextflow run ./tests/modules/bcftools/roh -entry test_bcftools_roh_stub -c ./tests/config/nextflow.config -c ./tests/modules/bcftools/roh/nextflow.config
tags:
- "bcftools"
- "bcftools/roh"
files:
- path: "output/bcftools/test.roh"
- path: "output/bcftools/versions.yml"

View file

@ -0,0 +1,28 @@
#!/usr/bin/env nextflow
nextflow.enable.dsl = 2
include { KAT_HIST } from '../../../../modules/kat/hist/main.nf'
workflow test_kat_hist_single_end {
input = [
[ id:'test', single_end:true ], // meta map
file(params.test_data['homo_sapiens']['illumina']['test2_1_fastq_gz'], checkIfExists: true)
]
KAT_HIST ( input )
}
workflow test_kat_hist_paired_end {
input = [
[ id:'test', single_end:false ], // meta map
[
file(params.test_data['homo_sapiens']['illumina']['test2_1_fastq_gz'], checkIfExists: true),
file(params.test_data['homo_sapiens']['illumina']['test2_2_fastq_gz'], checkIfExists: true),
]
]
KAT_HIST ( input )
}

View file

@ -0,0 +1,9 @@
process {
publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
withName: 'test_kat_hist_single_end:KAT_HIST' {
ext.args = '-d'
}
}

View file

@ -0,0 +1,42 @@
- name: kat hist test_kat_hist_single_end
command: nextflow run tests/modules/kat/hist -entry test_kat_hist_single_end -c tests/config/nextflow.config
tags:
- kat/hist
- kat
files:
- path: output/kat/test.hist
md5sum: c6eba52b3a2653a684577a8ae20b74c1
- path: output/kat/test.hist-hash.jf27
- path: output/kat/test.hist.dist_analysis.json
# md5sum: 52a5a2d91c71b940f36f1f0a7fd5ef10 # This is variable for an unknown reason
contains:
- "nb_peaks"
- "global_minima"
- "global_maxima"
- "mean_freq"
- "est_genome_size"
- "est_het_rate"
- path: output/kat/test.hist.png
md5sum: 49861ef1a265e0edde3550b39c64a274
- path: output/kat/versions.yml
- name: kat hist test_kat_hist_paired_end
command: nextflow run tests/modules/kat/hist -entry test_kat_hist_paired_end -c tests/config/nextflow.config
tags:
- kat/hist
- kat
files:
- path: output/kat/test.hist
md5sum: 91429091e74b1718051591d83a1ccb5d
- path: output/kat/test.hist.dist_analysis.json
# md5sum: 8b0dabeaff4ba706b33aa8964d687e13 # This is variable for an unknown reason
contains:
- "nb_peaks"
- "global_minima"
- "global_maxima"
- "mean_freq"
- "est_genome_size"
- "est_het_rate"
- path: output/kat/test.hist.png
md5sum: e20774d0d2b979cb6ead7b7fb5ad36d9
- path: output/kat/versions.yml