mirror of
https://github.com/MillironX/nf-core_modules.git
synced 2024-12-22 11:08:17 +00:00
New module: plink2/extract
(#1228)
* add plink2_extract * fix test yml path * Update modules/plink2/extract/main.nf Co-authored-by: James A. Fellows Yates <jfy133@gmail.com> * Update modules/plink2/extract/main.nf Co-authored-by: James A. Fellows Yates <jfy133@gmail.com> * compress output * add DOI * make outputs less ambiguous * update test for compressed output * brain is dumb * Update modules/plink2/extract/main.nf Co-authored-by: James A. Fellows Yates <jfy133@gmail.com> Co-authored-by: James A. Fellows Yates <jfy133@gmail.com>
This commit is contained in:
parent
f112e4d701
commit
e687c7025a
6 changed files with 167 additions and 0 deletions
37
modules/plink2/extract/main.nf
Normal file
37
modules/plink2/extract/main.nf
Normal file
|
@ -0,0 +1,37 @@
|
||||||
|
process PLINK2_EXTRACT {
|
||||||
|
tag "$meta.id"
|
||||||
|
label 'process_low'
|
||||||
|
|
||||||
|
conda (params.enable_conda ? "bioconda::plink2=2.00a2.3" : null)
|
||||||
|
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
|
||||||
|
'https://depot.galaxyproject.org/singularity/plink2:2.00a2.3--h712d239_1' :
|
||||||
|
'quay.io/biocontainers/plink2:2.00a2.3--h712d239_1' }"
|
||||||
|
|
||||||
|
input:
|
||||||
|
tuple val(meta), path(pgen), path(psam), path(pvar), path(variants)
|
||||||
|
|
||||||
|
output:
|
||||||
|
tuple val(meta), path("*.pgen") , emit: extract_pgen
|
||||||
|
tuple val(meta), path("*.psam") , emit: extract_psam
|
||||||
|
tuple val(meta), path("*.pvar.zst"), emit: extract_pvar
|
||||||
|
path "versions.yml" , emit: versions
|
||||||
|
|
||||||
|
script:
|
||||||
|
def args = task.ext.args ?: ''
|
||||||
|
def prefix = task.ext.prefix ?: "${meta.id}"
|
||||||
|
if( "$pgen" == "${prefix}.pgen" ) error "Input and output names are the same, use \"task.ext.prefix\" in modules.config to disambiguate!"
|
||||||
|
"""
|
||||||
|
plink2 \\
|
||||||
|
--pfile ${pgen.baseName} \\
|
||||||
|
$args \\
|
||||||
|
--threads $task.cpus \\
|
||||||
|
--extract $variants \\
|
||||||
|
--make-pgen vzs \\
|
||||||
|
--out ${prefix}
|
||||||
|
|
||||||
|
cat <<-END_VERSIONS > versions.yml
|
||||||
|
"${task.process}":
|
||||||
|
plink2: \$(plink2 --version 2>&1 | sed 's/^PLINK v//; s/ 64.*\$//' )
|
||||||
|
END_VERSIONS
|
||||||
|
"""
|
||||||
|
}
|
64
modules/plink2/extract/meta.yml
Normal file
64
modules/plink2/extract/meta.yml
Normal file
|
@ -0,0 +1,64 @@
|
||||||
|
name: plink2_extract
|
||||||
|
description: Subset plink pfiles with a text file of variant identifiers
|
||||||
|
keywords:
|
||||||
|
- plink2
|
||||||
|
- extract
|
||||||
|
tools:
|
||||||
|
- plink2:
|
||||||
|
description: |
|
||||||
|
Whole genome association analysis toolset, designed to perform a range
|
||||||
|
of basic, large-scale analyses in a computationally efficient manner
|
||||||
|
homepage: http://www.cog-genomics.org/plink/2.0/
|
||||||
|
documentation: http://www.cog-genomics.org/plink/2.0/general_usage
|
||||||
|
tool_dev_url: None
|
||||||
|
doi: "10.1186/s13742-015-0047-8"
|
||||||
|
licence: ['GPL v3']
|
||||||
|
|
||||||
|
input:
|
||||||
|
- meta:
|
||||||
|
type: map
|
||||||
|
description: |
|
||||||
|
Groovy Map containing sample information
|
||||||
|
e.g. [ id:'test', single_end:false ]
|
||||||
|
- pgen:
|
||||||
|
type: file
|
||||||
|
description: PLINK 2 binary genotype table
|
||||||
|
pattern: "*.{pgen}"
|
||||||
|
- psam:
|
||||||
|
type: file
|
||||||
|
description: PLINK 2 sample information file
|
||||||
|
pattern: "*.{psam}"
|
||||||
|
- pvar:
|
||||||
|
type: file
|
||||||
|
description: PLINK 2 variant information file
|
||||||
|
pattern: "*.{pvar}"
|
||||||
|
- variants:
|
||||||
|
type: file
|
||||||
|
description: A text file containing variant identifiers to keep (one per line)
|
||||||
|
pattern: "*.{keep}"
|
||||||
|
|
||||||
|
output:
|
||||||
|
- meta:
|
||||||
|
type: map
|
||||||
|
description: |
|
||||||
|
Groovy Map containing sample information
|
||||||
|
e.g. [ id:'test', single_end:false ]
|
||||||
|
- versions:
|
||||||
|
type: file
|
||||||
|
description: File containing software versions
|
||||||
|
pattern: "versions.yml"
|
||||||
|
- extract_pgen:
|
||||||
|
type: file
|
||||||
|
description: PLINK 2 binary genotype table, containing extracted variants
|
||||||
|
pattern: "*.{pgen}"
|
||||||
|
- extract_psam:
|
||||||
|
type: file
|
||||||
|
description: PLINK 2 sample information file associated with the extracted data
|
||||||
|
pattern: "*.{psam}"
|
||||||
|
- extract_pvar:
|
||||||
|
type: file
|
||||||
|
description: PLINK 2 variant information file, containing extracted variants
|
||||||
|
pattern: "*.{pvar.zst}"
|
||||||
|
|
||||||
|
authors:
|
||||||
|
- "@nebfield"
|
|
@ -1149,6 +1149,10 @@ plink/vcf:
|
||||||
- modules/plink/vcf/**
|
- modules/plink/vcf/**
|
||||||
- tests/modules/plink/vcf/**
|
- tests/modules/plink/vcf/**
|
||||||
|
|
||||||
|
plink2/extract:
|
||||||
|
- modules/plink2/extract/**
|
||||||
|
- tests/modules/plink2/extract/**
|
||||||
|
|
||||||
plink2/vcf:
|
plink2/vcf:
|
||||||
- modules/plink2/vcf/**
|
- modules/plink2/vcf/**
|
||||||
- tests/modules/plink2/vcf/**
|
- tests/modules/plink2/vcf/**
|
||||||
|
|
30
tests/modules/plink2/extract/main.nf
Normal file
30
tests/modules/plink2/extract/main.nf
Normal file
|
@ -0,0 +1,30 @@
|
||||||
|
#!/usr/bin/env nextflow
|
||||||
|
|
||||||
|
nextflow.enable.dsl = 2
|
||||||
|
|
||||||
|
include { PLINK2_VCF } from '../../../../modules/plink2/vcf/main.nf'
|
||||||
|
include { PLINK2_EXTRACT } from '../../../../modules/plink2/extract/main.nf'
|
||||||
|
|
||||||
|
workflow test_plink2_extract {
|
||||||
|
|
||||||
|
input = [
|
||||||
|
[ id:'test', single_end:false ], // meta map
|
||||||
|
file(params.test_data['homo_sapiens']['genome']['syntheticvcf_short_vcf_gz'], checkIfExists: true)
|
||||||
|
]
|
||||||
|
PLINK2_VCF ( input )
|
||||||
|
|
||||||
|
PLINK2_VCF.out.pvar
|
||||||
|
.splitText(file: 'variants.keep', keepHeader: false, by: 10)
|
||||||
|
.last()
|
||||||
|
.set { ch_variants }
|
||||||
|
|
||||||
|
ch_variants.view()
|
||||||
|
|
||||||
|
PLINK2_VCF.out.pgen
|
||||||
|
.concat(PLINK2_VCF.out.psam, PLINK2_VCF.out.pvar.concat(ch_variants))
|
||||||
|
.groupTuple()
|
||||||
|
.map{ meta, paths -> [meta, paths[0], paths[1], paths[2], paths[3]] }
|
||||||
|
.set { ch_extract }
|
||||||
|
|
||||||
|
PLINK2_EXTRACT ( ch_extract )
|
||||||
|
}
|
12
tests/modules/plink2/extract/nextflow.config
Normal file
12
tests/modules/plink2/extract/nextflow.config
Normal file
|
@ -0,0 +1,12 @@
|
||||||
|
process {
|
||||||
|
|
||||||
|
publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
|
||||||
|
|
||||||
|
withName: PLINK2_VCF {
|
||||||
|
ext.args = '--make-pgen --set-missing-var-ids @:#:\\$1:\\$2'
|
||||||
|
}
|
||||||
|
|
||||||
|
withName: PLINK2_EXTRACT {
|
||||||
|
ext.prefix = { "${meta.id}.extract" }
|
||||||
|
}
|
||||||
|
}
|
20
tests/modules/plink2/extract/test.yml
Normal file
20
tests/modules/plink2/extract/test.yml
Normal file
|
@ -0,0 +1,20 @@
|
||||||
|
- name: plink2 extract test_plink2_extract
|
||||||
|
command: nextflow run tests/modules/plink2/extract -entry test_plink2_extract -c tests/config/nextflow.config
|
||||||
|
tags:
|
||||||
|
- plink2/extract
|
||||||
|
- plink2
|
||||||
|
files:
|
||||||
|
- path: output/plink2/test.extract.pgen
|
||||||
|
md5sum: 785e729a293ecabb0d39394865316bda
|
||||||
|
- path: output/plink2/test.extract.psam
|
||||||
|
md5sum: e6c714488754cb8448c3dfda08c4c0ea
|
||||||
|
- path: output/plink2/test.extract.pvar.zst
|
||||||
|
md5sum: 076767e6695e681115eabb924a447ee9
|
||||||
|
- path: output/plink2/test.pgen
|
||||||
|
md5sum: fac12ca9041d6950f6b7d60ac2120721
|
||||||
|
- path: output/plink2/test.psam
|
||||||
|
md5sum: e6c714488754cb8448c3dfda08c4c0ea
|
||||||
|
- path: output/plink2/test.pvar
|
||||||
|
md5sum: ff9e44f8e5f4035d8cf2bfe7be6755b3
|
||||||
|
- path: output/plink2/versions.yml
|
||||||
|
md5sum: c477b7c9f6e39b89710fe1a0bceee50d
|
Loading…
Reference in a new issue