mirror of
https://github.com/MillironX/nf-core_modules.git
synced 2024-12-22 11:08:17 +00:00
implement plink2/score module (#1259)
* implement plink2/score module * fix test yml * fix typo :( * set cpu * set mem * fix input process input block * fix tests Co-authored-by: Sateesh <33637490+sateeshperi@users.noreply.github.com>
This commit is contained in:
parent
251015c8ba
commit
de0d57a562
7 changed files with 156 additions and 1 deletions
39
modules/plink2/score/main.nf
Normal file
39
modules/plink2/score/main.nf
Normal file
|
@ -0,0 +1,39 @@
|
||||||
|
process PLINK2_SCORE {
|
||||||
|
tag "$meta.id"
|
||||||
|
label 'process_low'
|
||||||
|
|
||||||
|
conda (params.enable_conda ? "bioconda::plink2=2.00a2.3" : null)
|
||||||
|
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
|
||||||
|
'https://depot.galaxyproject.org/singularity/plink2:2.00a2.3--h712d239_1' :
|
||||||
|
'quay.io/biocontainers/plink2:2.00a2.3--h712d239_1' }"
|
||||||
|
|
||||||
|
input:
|
||||||
|
tuple val(meta), path(pgen), path(psam), path(pvar)
|
||||||
|
path(scorefile)
|
||||||
|
|
||||||
|
output:
|
||||||
|
tuple val(meta), path("*.sscore"), emit: score
|
||||||
|
path("versions.yml") , emit: versions
|
||||||
|
|
||||||
|
when:
|
||||||
|
task.ext.when == null || task.ext.when
|
||||||
|
|
||||||
|
script:
|
||||||
|
def args = task.ext.args ?: ''
|
||||||
|
def prefix = task.ext.prefix ?: "${meta.id}"
|
||||||
|
def mem_mb = task.memory.toMega() // plink is greedy
|
||||||
|
"""
|
||||||
|
plink2 \\
|
||||||
|
--threads $task.cpus \\
|
||||||
|
--memory $mem_mb \\
|
||||||
|
--pfile ${pgen.baseName} vzs \\
|
||||||
|
--score ${scorefile} \\
|
||||||
|
$args \\
|
||||||
|
--out ${prefix}
|
||||||
|
|
||||||
|
cat <<-END_VERSIONS > versions.yml
|
||||||
|
"${task.process}":
|
||||||
|
plink2: \$(plink2 --version 2>&1 | sed 's/^PLINK v//; s/ 64.*\$//' )
|
||||||
|
END_VERSIONS
|
||||||
|
"""
|
||||||
|
}
|
56
modules/plink2/score/meta.yml
Normal file
56
modules/plink2/score/meta.yml
Normal file
|
@ -0,0 +1,56 @@
|
||||||
|
name: plink2_score
|
||||||
|
description: Apply a scoring system to each sample in a plink 2 fileset
|
||||||
|
keywords:
|
||||||
|
- plink2
|
||||||
|
- score
|
||||||
|
tools:
|
||||||
|
- plink2:
|
||||||
|
description: |
|
||||||
|
Whole genome association analysis toolset, designed to perform a range
|
||||||
|
of basic, large-scale analyses in a computationally efficient manner
|
||||||
|
homepage: http://www.cog-genomics.org/plink/2.0/
|
||||||
|
documentation: http://www.cog-genomics.org/plink/2.0/general_usage
|
||||||
|
tool_dev_url: None
|
||||||
|
doi: "10.1186/s13742-015-0047-8"
|
||||||
|
licence: ['GPL v3']
|
||||||
|
|
||||||
|
input:
|
||||||
|
- meta:
|
||||||
|
type: map
|
||||||
|
description: |
|
||||||
|
Groovy Map containing sample information
|
||||||
|
e.g. [ id:'test', single_end:false ]
|
||||||
|
- pgen:
|
||||||
|
type: file
|
||||||
|
description: PLINK 2 binary genotype table
|
||||||
|
pattern: "*.{pgen}"
|
||||||
|
- psam:
|
||||||
|
type: file
|
||||||
|
description: PLINK 2 sample information file
|
||||||
|
pattern: "*.{psam}"
|
||||||
|
- pvar:
|
||||||
|
type: file
|
||||||
|
description: PLINK 2 variant information file
|
||||||
|
pattern: "*.{pvar}"
|
||||||
|
- scorefile:
|
||||||
|
type: file
|
||||||
|
description: A text file containing variant identifiers and weights
|
||||||
|
pattern: "*.{scores,txt,scorefile}"
|
||||||
|
|
||||||
|
output:
|
||||||
|
- meta:
|
||||||
|
type: map
|
||||||
|
description: |
|
||||||
|
Groovy Map containing sample information
|
||||||
|
e.g. [ id:'test', single_end:false ]
|
||||||
|
- versions:
|
||||||
|
type: file
|
||||||
|
description: File containing software versions
|
||||||
|
pattern: "versions.yml"
|
||||||
|
- score:
|
||||||
|
type: file
|
||||||
|
description: A text file containing sample scores, in plink 2 .sscore format
|
||||||
|
pattern: "*.{sscore}"
|
||||||
|
|
||||||
|
authors:
|
||||||
|
- "@nebfield"
|
|
@ -1273,6 +1273,10 @@ plink2/extract:
|
||||||
- modules/plink2/extract/**
|
- modules/plink2/extract/**
|
||||||
- tests/modules/plink2/extract/**
|
- tests/modules/plink2/extract/**
|
||||||
|
|
||||||
|
plink2/score:
|
||||||
|
- modules/plink2/score/**
|
||||||
|
- tests/modules/plink2/score/**
|
||||||
|
|
||||||
plink2/vcf:
|
plink2/vcf:
|
||||||
- modules/plink2/vcf/**
|
- modules/plink2/vcf/**
|
||||||
- tests/modules/plink2/vcf/**
|
- tests/modules/plink2/vcf/**
|
||||||
|
|
|
@ -119,7 +119,7 @@ params {
|
||||||
genome_bed_gz_tbi = "${test_data_dir}/genomics/homo_sapiens/genome/genome.bed.gz.tbi"
|
genome_bed_gz_tbi = "${test_data_dir}/genomics/homo_sapiens/genome/genome.bed.gz.tbi"
|
||||||
transcriptome_fasta = "${test_data_dir}/genomics/homo_sapiens/genome/transcriptome.fasta"
|
transcriptome_fasta = "${test_data_dir}/genomics/homo_sapiens/genome/transcriptome.fasta"
|
||||||
genome2_fasta = "${test_data_dir}/genomics/homo_sapiens/genome/genome2.fasta"
|
genome2_fasta = "${test_data_dir}/genomics/homo_sapiens/genome/genome2.fasta"
|
||||||
genome_chain_gz = "${test_data_dir}/genomics/homo_sapiens/genome/genome.chain.gz"
|
genome_chain_gz = "${test_data_dir}/genomics/homo_sapiens/genome/genome.chain.gz"
|
||||||
genome_21_fasta = "${test_data_dir}/genomics/homo_sapiens/genome/chr21/sequence/genome.fasta"
|
genome_21_fasta = "${test_data_dir}/genomics/homo_sapiens/genome/chr21/sequence/genome.fasta"
|
||||||
genome_21_fasta_fai = "${test_data_dir}/genomics/homo_sapiens/genome/chr21/sequence/genome.fasta.fai"
|
genome_21_fasta_fai = "${test_data_dir}/genomics/homo_sapiens/genome/chr21/sequence/genome.fasta.fai"
|
||||||
genome_21_dict = "${test_data_dir}/genomics/homo_sapiens/genome/chr21/sequence/genome.dict"
|
genome_21_dict = "${test_data_dir}/genomics/homo_sapiens/genome/chr21/sequence/genome.dict"
|
||||||
|
@ -138,6 +138,7 @@ params {
|
||||||
mills_and_1000g_indels_vcf_gz_tbi = "${test_data_dir}/genomics/homo_sapiens/genome/vcf/mills_and_1000G.indels.vcf.gz.tbi"
|
mills_and_1000g_indels_vcf_gz_tbi = "${test_data_dir}/genomics/homo_sapiens/genome/vcf/mills_and_1000G.indels.vcf.gz.tbi"
|
||||||
syntheticvcf_short_vcf_gz = "${test_data_dir}/genomics/homo_sapiens/genome/vcf/syntheticvcf_short.vcf.gz"
|
syntheticvcf_short_vcf_gz = "${test_data_dir}/genomics/homo_sapiens/genome/vcf/syntheticvcf_short.vcf.gz"
|
||||||
syntheticvcf_short_vcf_gz_tbi = "${test_data_dir}/genomics/homo_sapiens/genome/vcf/syntheticvcf_short.vcf.gz.tbi"
|
syntheticvcf_short_vcf_gz_tbi = "${test_data_dir}/genomics/homo_sapiens/genome/vcf/syntheticvcf_short.vcf.gz.tbi"
|
||||||
|
syntheticvcf_short_score = "${test_data_dir}/genomics/homo_sapiens/genome/vcf/syntheticvcf_short.score"
|
||||||
gnomad_r2_1_1_sv_vcf_gz = "${test_data_dir}/genomics/homo_sapiens/genome/vcf/gnomAD.r2.1.1-sv.vcf.gz"
|
gnomad_r2_1_1_sv_vcf_gz = "${test_data_dir}/genomics/homo_sapiens/genome/vcf/gnomAD.r2.1.1-sv.vcf.gz"
|
||||||
|
|
||||||
hapmap_3_3_hg38_21_vcf_gz = "${test_data_dir}/genomics/homo_sapiens/genome/chr21/germlineresources/hapmap_3.3.hg38.vcf.gz"
|
hapmap_3_3_hg38_21_vcf_gz = "${test_data_dir}/genomics/homo_sapiens/genome/chr21/germlineresources/hapmap_3.3.hg38.vcf.gz"
|
||||||
|
|
24
tests/modules/plink2/score/main.nf
Normal file
24
tests/modules/plink2/score/main.nf
Normal file
|
@ -0,0 +1,24 @@
|
||||||
|
#!/usr/bin/env nextflow
|
||||||
|
|
||||||
|
nextflow.enable.dsl = 2
|
||||||
|
|
||||||
|
include { PLINK2_VCF } from '../../../../modules/plink2/vcf/main.nf'
|
||||||
|
include { PLINK2_SCORE } from '../../../../modules/plink2/score/main.nf'
|
||||||
|
|
||||||
|
workflow test_plink2_score {
|
||||||
|
input = [
|
||||||
|
[ id:'test', single_end:false ], // meta map
|
||||||
|
file(params.test_data['homo_sapiens']['genome']['syntheticvcf_short_vcf_gz'], checkIfExists: true)
|
||||||
|
]
|
||||||
|
PLINK2_VCF ( input )
|
||||||
|
|
||||||
|
scorefile = file(params.test_data['homo_sapiens']['genome']['syntheticvcf_short_score'], checkIfExists: true)
|
||||||
|
|
||||||
|
PLINK2_VCF.out.pgen
|
||||||
|
.concat(PLINK2_VCF.out.psam, PLINK2_VCF.out.pvar)
|
||||||
|
.groupTuple()
|
||||||
|
.map { it.flatten() }
|
||||||
|
.set { ch_target_genome }
|
||||||
|
|
||||||
|
PLINK2_SCORE ( ch_target_genome, scorefile )
|
||||||
|
}
|
15
tests/modules/plink2/score/nextflow.config
Normal file
15
tests/modules/plink2/score/nextflow.config
Normal file
|
@ -0,0 +1,15 @@
|
||||||
|
process {
|
||||||
|
|
||||||
|
publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
|
||||||
|
|
||||||
|
// relabel input variants to a common scheme chr:pos:alt:ref
|
||||||
|
withName: PLINK2_VCF {
|
||||||
|
ext.args = '--set-missing-var-ids @:#:\\$1:\\$2'
|
||||||
|
}
|
||||||
|
|
||||||
|
// scoring really needs an adjustment for small test dataset (n > 50
|
||||||
|
// normally)
|
||||||
|
withName: PLINK2_SCORE {
|
||||||
|
ext.args = 'no-mean-imputation'
|
||||||
|
}
|
||||||
|
}
|
16
tests/modules/plink2/score/test.yml
Normal file
16
tests/modules/plink2/score/test.yml
Normal file
|
@ -0,0 +1,16 @@
|
||||||
|
- name: plink2 score test_plink2_score
|
||||||
|
command: nextflow run tests/modules/plink2/score -entry test_plink2_score -c tests/config/nextflow.config
|
||||||
|
tags:
|
||||||
|
- plink2
|
||||||
|
- plink2/score
|
||||||
|
files:
|
||||||
|
- path: output/plink2/test.pgen
|
||||||
|
md5sum: fac12ca9041d6950f6b7d60ac2120721
|
||||||
|
- path: output/plink2/test.psam
|
||||||
|
md5sum: e6c714488754cb8448c3dfda08c4c0ea
|
||||||
|
- path: output/plink2/test.pvar.zst
|
||||||
|
md5sum: 98d59e9779a8b62d5032cd98b642a63b
|
||||||
|
- path: output/plink2/test.sscore
|
||||||
|
md5sum: 97bde840f69febd65f2c00e9243126e9
|
||||||
|
- path: output/plink2/versions.yml
|
||||||
|
md5sum: 71499ab14e1583c88ced3a7a4f05bfa7
|
Loading…
Reference in a new issue