From de0d57a5623ecb81d1bbc7ad73b5a8754b903d4c Mon Sep 17 00:00:00 2001 From: Benjamin Wingfield Date: Mon, 7 Mar 2022 18:02:40 +0000 Subject: [PATCH] implement plink2/score module (#1259) * implement plink2/score module * fix test yml * fix typo :( * set cpu * set mem * fix input process input block * fix tests Co-authored-by: Sateesh <33637490+sateeshperi@users.noreply.github.com> --- modules/plink2/score/main.nf | 39 +++++++++++++++ modules/plink2/score/meta.yml | 56 ++++++++++++++++++++++ tests/config/pytest_modules.yml | 4 ++ tests/config/test_data.config | 3 +- tests/modules/plink2/score/main.nf | 24 ++++++++++ tests/modules/plink2/score/nextflow.config | 15 ++++++ tests/modules/plink2/score/test.yml | 16 +++++++ 7 files changed, 156 insertions(+), 1 deletion(-) create mode 100644 modules/plink2/score/main.nf create mode 100644 modules/plink2/score/meta.yml create mode 100644 tests/modules/plink2/score/main.nf create mode 100644 tests/modules/plink2/score/nextflow.config create mode 100644 tests/modules/plink2/score/test.yml diff --git a/modules/plink2/score/main.nf b/modules/plink2/score/main.nf new file mode 100644 index 00000000..6f561322 --- /dev/null +++ b/modules/plink2/score/main.nf @@ -0,0 +1,39 @@ +process PLINK2_SCORE { + tag "$meta.id" + label 'process_low' + + conda (params.enable_conda ? "bioconda::plink2=2.00a2.3" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/plink2:2.00a2.3--h712d239_1' : + 'quay.io/biocontainers/plink2:2.00a2.3--h712d239_1' }" + + input: + tuple val(meta), path(pgen), path(psam), path(pvar) + path(scorefile) + + output: + tuple val(meta), path("*.sscore"), emit: score + path("versions.yml") , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def mem_mb = task.memory.toMega() // plink is greedy + """ + plink2 \\ + --threads $task.cpus \\ + --memory $mem_mb \\ + --pfile ${pgen.baseName} vzs \\ + --score ${scorefile} \\ + $args \\ + --out ${prefix} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + plink2: \$(plink2 --version 2>&1 | sed 's/^PLINK v//; s/ 64.*\$//' ) + END_VERSIONS + """ +} diff --git a/modules/plink2/score/meta.yml b/modules/plink2/score/meta.yml new file mode 100644 index 00000000..5dad6259 --- /dev/null +++ b/modules/plink2/score/meta.yml @@ -0,0 +1,56 @@ +name: plink2_score +description: Apply a scoring system to each sample in a plink 2 fileset +keywords: + - plink2 + - score +tools: + - plink2: + description: | + Whole genome association analysis toolset, designed to perform a range + of basic, large-scale analyses in a computationally efficient manner + homepage: http://www.cog-genomics.org/plink/2.0/ + documentation: http://www.cog-genomics.org/plink/2.0/general_usage + tool_dev_url: None + doi: "10.1186/s13742-015-0047-8" + licence: ['GPL v3'] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - pgen: + type: file + description: PLINK 2 binary genotype table + pattern: "*.{pgen}" + - psam: + type: file + description: PLINK 2 sample information file + pattern: "*.{psam}" + - pvar: + type: file + description: PLINK 2 variant information file + pattern: "*.{pvar}" + - scorefile: + type: file + description: A text file containing variant identifiers and weights + pattern: "*.{scores,txt,scorefile}" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - score: + type: file + description: A text file containing sample scores, in plink 2 .sscore format + pattern: "*.{sscore}" + +authors: + - "@nebfield" diff --git a/tests/config/pytest_modules.yml b/tests/config/pytest_modules.yml index 553128de..d6575ff1 100644 --- a/tests/config/pytest_modules.yml +++ b/tests/config/pytest_modules.yml @@ -1273,6 +1273,10 @@ plink2/extract: - modules/plink2/extract/** - tests/modules/plink2/extract/** +plink2/score: + - modules/plink2/score/** + - tests/modules/plink2/score/** + plink2/vcf: - modules/plink2/vcf/** - tests/modules/plink2/vcf/** diff --git a/tests/config/test_data.config b/tests/config/test_data.config index dda10192..ce4f7ae8 100644 --- a/tests/config/test_data.config +++ b/tests/config/test_data.config @@ -119,7 +119,7 @@ params { genome_bed_gz_tbi = "${test_data_dir}/genomics/homo_sapiens/genome/genome.bed.gz.tbi" transcriptome_fasta = "${test_data_dir}/genomics/homo_sapiens/genome/transcriptome.fasta" genome2_fasta = "${test_data_dir}/genomics/homo_sapiens/genome/genome2.fasta" - genome_chain_gz = "${test_data_dir}/genomics/homo_sapiens/genome/genome.chain.gz" + genome_chain_gz = "${test_data_dir}/genomics/homo_sapiens/genome/genome.chain.gz" genome_21_fasta = "${test_data_dir}/genomics/homo_sapiens/genome/chr21/sequence/genome.fasta" genome_21_fasta_fai = "${test_data_dir}/genomics/homo_sapiens/genome/chr21/sequence/genome.fasta.fai" genome_21_dict = "${test_data_dir}/genomics/homo_sapiens/genome/chr21/sequence/genome.dict" @@ -138,6 +138,7 @@ params { mills_and_1000g_indels_vcf_gz_tbi = "${test_data_dir}/genomics/homo_sapiens/genome/vcf/mills_and_1000G.indels.vcf.gz.tbi" syntheticvcf_short_vcf_gz = "${test_data_dir}/genomics/homo_sapiens/genome/vcf/syntheticvcf_short.vcf.gz" syntheticvcf_short_vcf_gz_tbi = "${test_data_dir}/genomics/homo_sapiens/genome/vcf/syntheticvcf_short.vcf.gz.tbi" + syntheticvcf_short_score = "${test_data_dir}/genomics/homo_sapiens/genome/vcf/syntheticvcf_short.score" gnomad_r2_1_1_sv_vcf_gz = "${test_data_dir}/genomics/homo_sapiens/genome/vcf/gnomAD.r2.1.1-sv.vcf.gz" hapmap_3_3_hg38_21_vcf_gz = "${test_data_dir}/genomics/homo_sapiens/genome/chr21/germlineresources/hapmap_3.3.hg38.vcf.gz" diff --git a/tests/modules/plink2/score/main.nf b/tests/modules/plink2/score/main.nf new file mode 100644 index 00000000..6a09e829 --- /dev/null +++ b/tests/modules/plink2/score/main.nf @@ -0,0 +1,24 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { PLINK2_VCF } from '../../../../modules/plink2/vcf/main.nf' +include { PLINK2_SCORE } from '../../../../modules/plink2/score/main.nf' + +workflow test_plink2_score { + input = [ + [ id:'test', single_end:false ], // meta map + file(params.test_data['homo_sapiens']['genome']['syntheticvcf_short_vcf_gz'], checkIfExists: true) + ] + PLINK2_VCF ( input ) + + scorefile = file(params.test_data['homo_sapiens']['genome']['syntheticvcf_short_score'], checkIfExists: true) + + PLINK2_VCF.out.pgen + .concat(PLINK2_VCF.out.psam, PLINK2_VCF.out.pvar) + .groupTuple() + .map { it.flatten() } + .set { ch_target_genome } + + PLINK2_SCORE ( ch_target_genome, scorefile ) +} diff --git a/tests/modules/plink2/score/nextflow.config b/tests/modules/plink2/score/nextflow.config new file mode 100644 index 00000000..083e4666 --- /dev/null +++ b/tests/modules/plink2/score/nextflow.config @@ -0,0 +1,15 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + + // relabel input variants to a common scheme chr:pos:alt:ref + withName: PLINK2_VCF { + ext.args = '--set-missing-var-ids @:#:\\$1:\\$2' + } + + // scoring really needs an adjustment for small test dataset (n > 50 + // normally) + withName: PLINK2_SCORE { + ext.args = 'no-mean-imputation' + } +} diff --git a/tests/modules/plink2/score/test.yml b/tests/modules/plink2/score/test.yml new file mode 100644 index 00000000..7993cb34 --- /dev/null +++ b/tests/modules/plink2/score/test.yml @@ -0,0 +1,16 @@ +- name: plink2 score test_plink2_score + command: nextflow run tests/modules/plink2/score -entry test_plink2_score -c tests/config/nextflow.config + tags: + - plink2 + - plink2/score + files: + - path: output/plink2/test.pgen + md5sum: fac12ca9041d6950f6b7d60ac2120721 + - path: output/plink2/test.psam + md5sum: e6c714488754cb8448c3dfda08c4c0ea + - path: output/plink2/test.pvar.zst + md5sum: 98d59e9779a8b62d5032cd98b642a63b + - path: output/plink2/test.sscore + md5sum: 97bde840f69febd65f2c00e9243126e9 + - path: output/plink2/versions.yml + md5sum: 71499ab14e1583c88ced3a7a4f05bfa7