mirror of
https://github.com/MillironX/nf-core_modules.git
synced 2024-12-21 18:58:16 +00:00
New module: plink2/extract
(#1228)
* add plink2_extract * fix test yml path * Update modules/plink2/extract/main.nf Co-authored-by: James A. Fellows Yates <jfy133@gmail.com> * Update modules/plink2/extract/main.nf Co-authored-by: James A. Fellows Yates <jfy133@gmail.com> * compress output * add DOI * make outputs less ambiguous * update test for compressed output * brain is dumb * Update modules/plink2/extract/main.nf Co-authored-by: James A. Fellows Yates <jfy133@gmail.com> Co-authored-by: James A. Fellows Yates <jfy133@gmail.com>
This commit is contained in:
parent
f112e4d701
commit
e687c7025a
6 changed files with 167 additions and 0 deletions
37
modules/plink2/extract/main.nf
Normal file
37
modules/plink2/extract/main.nf
Normal file
|
@ -0,0 +1,37 @@
|
|||
process PLINK2_EXTRACT {
|
||||
tag "$meta.id"
|
||||
label 'process_low'
|
||||
|
||||
conda (params.enable_conda ? "bioconda::plink2=2.00a2.3" : null)
|
||||
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
|
||||
'https://depot.galaxyproject.org/singularity/plink2:2.00a2.3--h712d239_1' :
|
||||
'quay.io/biocontainers/plink2:2.00a2.3--h712d239_1' }"
|
||||
|
||||
input:
|
||||
tuple val(meta), path(pgen), path(psam), path(pvar), path(variants)
|
||||
|
||||
output:
|
||||
tuple val(meta), path("*.pgen") , emit: extract_pgen
|
||||
tuple val(meta), path("*.psam") , emit: extract_psam
|
||||
tuple val(meta), path("*.pvar.zst"), emit: extract_pvar
|
||||
path "versions.yml" , emit: versions
|
||||
|
||||
script:
|
||||
def args = task.ext.args ?: ''
|
||||
def prefix = task.ext.prefix ?: "${meta.id}"
|
||||
if( "$pgen" == "${prefix}.pgen" ) error "Input and output names are the same, use \"task.ext.prefix\" in modules.config to disambiguate!"
|
||||
"""
|
||||
plink2 \\
|
||||
--pfile ${pgen.baseName} \\
|
||||
$args \\
|
||||
--threads $task.cpus \\
|
||||
--extract $variants \\
|
||||
--make-pgen vzs \\
|
||||
--out ${prefix}
|
||||
|
||||
cat <<-END_VERSIONS > versions.yml
|
||||
"${task.process}":
|
||||
plink2: \$(plink2 --version 2>&1 | sed 's/^PLINK v//; s/ 64.*\$//' )
|
||||
END_VERSIONS
|
||||
"""
|
||||
}
|
64
modules/plink2/extract/meta.yml
Normal file
64
modules/plink2/extract/meta.yml
Normal file
|
@ -0,0 +1,64 @@
|
|||
name: plink2_extract
|
||||
description: Subset plink pfiles with a text file of variant identifiers
|
||||
keywords:
|
||||
- plink2
|
||||
- extract
|
||||
tools:
|
||||
- plink2:
|
||||
description: |
|
||||
Whole genome association analysis toolset, designed to perform a range
|
||||
of basic, large-scale analyses in a computationally efficient manner
|
||||
homepage: http://www.cog-genomics.org/plink/2.0/
|
||||
documentation: http://www.cog-genomics.org/plink/2.0/general_usage
|
||||
tool_dev_url: None
|
||||
doi: "10.1186/s13742-015-0047-8"
|
||||
licence: ['GPL v3']
|
||||
|
||||
input:
|
||||
- meta:
|
||||
type: map
|
||||
description: |
|
||||
Groovy Map containing sample information
|
||||
e.g. [ id:'test', single_end:false ]
|
||||
- pgen:
|
||||
type: file
|
||||
description: PLINK 2 binary genotype table
|
||||
pattern: "*.{pgen}"
|
||||
- psam:
|
||||
type: file
|
||||
description: PLINK 2 sample information file
|
||||
pattern: "*.{psam}"
|
||||
- pvar:
|
||||
type: file
|
||||
description: PLINK 2 variant information file
|
||||
pattern: "*.{pvar}"
|
||||
- variants:
|
||||
type: file
|
||||
description: A text file containing variant identifiers to keep (one per line)
|
||||
pattern: "*.{keep}"
|
||||
|
||||
output:
|
||||
- meta:
|
||||
type: map
|
||||
description: |
|
||||
Groovy Map containing sample information
|
||||
e.g. [ id:'test', single_end:false ]
|
||||
- versions:
|
||||
type: file
|
||||
description: File containing software versions
|
||||
pattern: "versions.yml"
|
||||
- extract_pgen:
|
||||
type: file
|
||||
description: PLINK 2 binary genotype table, containing extracted variants
|
||||
pattern: "*.{pgen}"
|
||||
- extract_psam:
|
||||
type: file
|
||||
description: PLINK 2 sample information file associated with the extracted data
|
||||
pattern: "*.{psam}"
|
||||
- extract_pvar:
|
||||
type: file
|
||||
description: PLINK 2 variant information file, containing extracted variants
|
||||
pattern: "*.{pvar.zst}"
|
||||
|
||||
authors:
|
||||
- "@nebfield"
|
|
@ -1149,6 +1149,10 @@ plink/vcf:
|
|||
- modules/plink/vcf/**
|
||||
- tests/modules/plink/vcf/**
|
||||
|
||||
plink2/extract:
|
||||
- modules/plink2/extract/**
|
||||
- tests/modules/plink2/extract/**
|
||||
|
||||
plink2/vcf:
|
||||
- modules/plink2/vcf/**
|
||||
- tests/modules/plink2/vcf/**
|
||||
|
|
30
tests/modules/plink2/extract/main.nf
Normal file
30
tests/modules/plink2/extract/main.nf
Normal file
|
@ -0,0 +1,30 @@
|
|||
#!/usr/bin/env nextflow
|
||||
|
||||
nextflow.enable.dsl = 2
|
||||
|
||||
include { PLINK2_VCF } from '../../../../modules/plink2/vcf/main.nf'
|
||||
include { PLINK2_EXTRACT } from '../../../../modules/plink2/extract/main.nf'
|
||||
|
||||
workflow test_plink2_extract {
|
||||
|
||||
input = [
|
||||
[ id:'test', single_end:false ], // meta map
|
||||
file(params.test_data['homo_sapiens']['genome']['syntheticvcf_short_vcf_gz'], checkIfExists: true)
|
||||
]
|
||||
PLINK2_VCF ( input )
|
||||
|
||||
PLINK2_VCF.out.pvar
|
||||
.splitText(file: 'variants.keep', keepHeader: false, by: 10)
|
||||
.last()
|
||||
.set { ch_variants }
|
||||
|
||||
ch_variants.view()
|
||||
|
||||
PLINK2_VCF.out.pgen
|
||||
.concat(PLINK2_VCF.out.psam, PLINK2_VCF.out.pvar.concat(ch_variants))
|
||||
.groupTuple()
|
||||
.map{ meta, paths -> [meta, paths[0], paths[1], paths[2], paths[3]] }
|
||||
.set { ch_extract }
|
||||
|
||||
PLINK2_EXTRACT ( ch_extract )
|
||||
}
|
12
tests/modules/plink2/extract/nextflow.config
Normal file
12
tests/modules/plink2/extract/nextflow.config
Normal file
|
@ -0,0 +1,12 @@
|
|||
process {
|
||||
|
||||
publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
|
||||
|
||||
withName: PLINK2_VCF {
|
||||
ext.args = '--make-pgen --set-missing-var-ids @:#:\\$1:\\$2'
|
||||
}
|
||||
|
||||
withName: PLINK2_EXTRACT {
|
||||
ext.prefix = { "${meta.id}.extract" }
|
||||
}
|
||||
}
|
20
tests/modules/plink2/extract/test.yml
Normal file
20
tests/modules/plink2/extract/test.yml
Normal file
|
@ -0,0 +1,20 @@
|
|||
- name: plink2 extract test_plink2_extract
|
||||
command: nextflow run tests/modules/plink2/extract -entry test_plink2_extract -c tests/config/nextflow.config
|
||||
tags:
|
||||
- plink2/extract
|
||||
- plink2
|
||||
files:
|
||||
- path: output/plink2/test.extract.pgen
|
||||
md5sum: 785e729a293ecabb0d39394865316bda
|
||||
- path: output/plink2/test.extract.psam
|
||||
md5sum: e6c714488754cb8448c3dfda08c4c0ea
|
||||
- path: output/plink2/test.extract.pvar.zst
|
||||
md5sum: 076767e6695e681115eabb924a447ee9
|
||||
- path: output/plink2/test.pgen
|
||||
md5sum: fac12ca9041d6950f6b7d60ac2120721
|
||||
- path: output/plink2/test.psam
|
||||
md5sum: e6c714488754cb8448c3dfda08c4c0ea
|
||||
- path: output/plink2/test.pvar
|
||||
md5sum: ff9e44f8e5f4035d8cf2bfe7be6755b3
|
||||
- path: output/plink2/versions.yml
|
||||
md5sum: c477b7c9f6e39b89710fe1a0bceee50d
|
Loading…
Reference in a new issue