mirror of
https://github.com/MillironX/nf-core_modules.git
synced 2024-12-22 11:08:17 +00:00
New module: NGSCheckMate (#1290)
NGSCheckMate ncm mode, working on bam files and vcf files to check that (human) samples match as expected Co-authored-by: Simon Pearce <simon.pearce@cruk.manchester.ac.uk> Co-authored-by: Mahesh Binzer-Panchal <mahesh.binzer-panchal@nbis.se>
This commit is contained in:
parent
62da45b0e1
commit
79a9d5e1ea
6 changed files with 236 additions and 0 deletions
49
modules/ngscheckmate/ncm/main.nf
Normal file
49
modules/ngscheckmate/ncm/main.nf
Normal file
|
@ -0,0 +1,49 @@
|
||||||
|
process NGSCHECKMATE_NCM {
|
||||||
|
label 'process_low'
|
||||||
|
|
||||||
|
conda (params.enable_conda ? "bioconda::ngscheckmate=1.0.0" : null)
|
||||||
|
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
|
||||||
|
'https://depot.galaxyproject.org/singularity/ngscheckmate:1.0.0--py27r41hdfd78af_3':
|
||||||
|
'quay.io/biocontainers/ngscheckmate:1.0.0--py27r41hdfd78af_3' }"
|
||||||
|
|
||||||
|
input:
|
||||||
|
path files
|
||||||
|
path snp_bed
|
||||||
|
path fasta
|
||||||
|
|
||||||
|
output:
|
||||||
|
path "*.pdf" , emit: pdf
|
||||||
|
path "*_corr_matrix.txt", emit: corr_matrix
|
||||||
|
path "*_matched.txt" , emit: matched
|
||||||
|
path "*_all.txt" , emit: all
|
||||||
|
path "*.vcf" , emit: vcfs, optional: true
|
||||||
|
path "versions.yml" , emit: versions
|
||||||
|
|
||||||
|
when:
|
||||||
|
task.ext.when == null || task.ext.when
|
||||||
|
|
||||||
|
script:
|
||||||
|
def args = task.ext.args ?: ''
|
||||||
|
def prefix = task.ext.prefix ?: "output"
|
||||||
|
def unzip = files.any { it.toString().endsWith(".vcf.gz") }
|
||||||
|
"""
|
||||||
|
if $unzip
|
||||||
|
then
|
||||||
|
for VCFGZ in *.vcf.gz; do
|
||||||
|
gunzip -cdf \$VCFGZ > \$( basename \$VCFGZ .gz );
|
||||||
|
done
|
||||||
|
fi
|
||||||
|
|
||||||
|
NCM_REF="./"${fasta} ncm.py -d . -bed ${snp_bed} -O . -N ${prefix} $args
|
||||||
|
|
||||||
|
if $unzip
|
||||||
|
then
|
||||||
|
rm -f *.vcf # clean up decompressed vcfs
|
||||||
|
fi
|
||||||
|
|
||||||
|
cat <<-END_VERSIONS > versions.yml
|
||||||
|
"${task.process}":
|
||||||
|
ngscheckmate: \$(ncm.py --help | sed "7!d;s/ *Ensuring Sample Identity v//g")
|
||||||
|
END_VERSIONS
|
||||||
|
"""
|
||||||
|
}
|
64
modules/ngscheckmate/ncm/meta.yml
Normal file
64
modules/ngscheckmate/ncm/meta.yml
Normal file
|
@ -0,0 +1,64 @@
|
||||||
|
name: ngscheckmate_ncm
|
||||||
|
description: Determining whether sequencing data comes from the same individual by using SNP matching. Designed for humans on vcf or bam files.
|
||||||
|
keywords:
|
||||||
|
- ngscheckmate
|
||||||
|
- matching
|
||||||
|
- snp
|
||||||
|
tools:
|
||||||
|
- ngscheckmate:
|
||||||
|
description: NGSCheckMate is a software package for identifying next generation sequencing (NGS) data files from the same individual, including matching between DNA and RNA.
|
||||||
|
homepage: https://github.com/parklab/NGSCheckMate
|
||||||
|
documentation: https://github.com/parklab/NGSCheckMate
|
||||||
|
tool_dev_url: https://github.com/parklab/NGSCheckMate
|
||||||
|
doi: "doi:/10.1093/nar/gkx193"
|
||||||
|
licence: ['MIT']
|
||||||
|
|
||||||
|
input:
|
||||||
|
- files:
|
||||||
|
type: file
|
||||||
|
description: VCF or BAM files for each sample, in a merged channel (possibly gzipped). BAM files require an index too.
|
||||||
|
pattern: "*.{vcf,vcf.gz,bam,bai}"
|
||||||
|
|
||||||
|
- snp_bed:
|
||||||
|
type: file
|
||||||
|
description: BED file containing the SNPs to analyse
|
||||||
|
pattern: "*.{bed}"
|
||||||
|
|
||||||
|
- fasta:
|
||||||
|
type: file
|
||||||
|
description: fasta file for the genome, only used in the bam mode
|
||||||
|
pattern: "*.{bed}"
|
||||||
|
|
||||||
|
output:
|
||||||
|
- versions:
|
||||||
|
type: file
|
||||||
|
description: File containing software versions
|
||||||
|
pattern: "versions.yml"
|
||||||
|
|
||||||
|
- pdf:
|
||||||
|
type: file
|
||||||
|
description: A pdf containing a dendrogram showing how the samples match up
|
||||||
|
pattern: "*.{pdf}"
|
||||||
|
|
||||||
|
- corr_matrix:
|
||||||
|
type: file
|
||||||
|
description: A text file containing the correlation matrix between each sample
|
||||||
|
pattern: "*corr_matrix.txt"
|
||||||
|
|
||||||
|
- matched:
|
||||||
|
type: file
|
||||||
|
description: A txt file containing only the samples that match with each other
|
||||||
|
pattern: "*matched.txt"
|
||||||
|
|
||||||
|
- all:
|
||||||
|
type: file
|
||||||
|
description: A txt file containing all the sample comparisons, whether they match or not
|
||||||
|
pattern: "*all.txt"
|
||||||
|
|
||||||
|
- vcfs:
|
||||||
|
type: file
|
||||||
|
description: If ran in bam mode, vcf files for each sample giving the SNP calls
|
||||||
|
pattern: "*.vcf"
|
||||||
|
|
||||||
|
authors:
|
||||||
|
- "@sppearce"
|
|
@ -1145,6 +1145,10 @@ ngmaster:
|
||||||
- modules/ngmaster/**
|
- modules/ngmaster/**
|
||||||
- tests/modules/ngmaster/**
|
- tests/modules/ngmaster/**
|
||||||
|
|
||||||
|
ngscheckmate/ncm:
|
||||||
|
- modules/ngscheckmate/ncm/**
|
||||||
|
- tests/modules/ngscheckmate/ncm/**
|
||||||
|
|
||||||
nucmer:
|
nucmer:
|
||||||
- modules/nucmer/**
|
- modules/nucmer/**
|
||||||
- tests/modules/nucmer/**
|
- tests/modules/nucmer/**
|
||||||
|
|
63
tests/modules/ngscheckmate/ncm/main.nf
Normal file
63
tests/modules/ngscheckmate/ncm/main.nf
Normal file
|
@ -0,0 +1,63 @@
|
||||||
|
#!/usr/bin/env nextflow
|
||||||
|
|
||||||
|
nextflow.enable.dsl = 2
|
||||||
|
|
||||||
|
include { NGSCHECKMATE_NCM as NGSCHECKMATE_NCM_BAM} from '../../../../modules/ngscheckmate/ncm/main.nf'
|
||||||
|
include { NGSCHECKMATE_NCM as NGSCHECKMATE_NCM_VCF} from '../../../../modules/ngscheckmate/ncm/main.nf'
|
||||||
|
|
||||||
|
include { BEDTOOLS_MAKEWINDOWS } from '../../../../modules/bedtools/makewindows/main.nf'
|
||||||
|
|
||||||
|
include { BCFTOOLS_MPILEUP } from '../../../../modules/bcftools/mpileup/main.nf'
|
||||||
|
include { BCFTOOLS_MPILEUP as BCFTOOLS_MPILEUP2 } from '../../../../modules/bcftools/mpileup/main.nf'
|
||||||
|
|
||||||
|
workflow test_ngscheckmate_ncm_bam {
|
||||||
|
input = [ file(params.test_data['sarscov2']['illumina']['test_paired_end_methylated_sorted_bam'], checkIfExists: true),
|
||||||
|
file(params.test_data['sarscov2']['illumina']['test_paired_end_methylated_sorted_bam_bai'], checkIfExists: true),
|
||||||
|
file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true),
|
||||||
|
file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true)]
|
||||||
|
|
||||||
|
fasta = [ file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) ]
|
||||||
|
|
||||||
|
inputBed = [ [ id:'test'],
|
||||||
|
file(params.test_data['sarscov2']['genome']['test_bed'], checkIfExists: true)]
|
||||||
|
|
||||||
|
BEDTOOLS_MAKEWINDOWS(inputBed, true).
|
||||||
|
tab.
|
||||||
|
map{it[1]}.
|
||||||
|
view().
|
||||||
|
set{snp_channel}
|
||||||
|
|
||||||
|
NGSCHECKMATE_NCM_BAM(input, snp_channel, fasta)
|
||||||
|
}
|
||||||
|
|
||||||
|
workflow test_ngscheckmate_ncm_vcf {
|
||||||
|
input1 = [ [ id:'test1' ], // meta map
|
||||||
|
[ file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true) ]
|
||||||
|
]
|
||||||
|
|
||||||
|
input2 = [ [ id:'test2' ], // meta map
|
||||||
|
[ file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true) ]
|
||||||
|
]
|
||||||
|
|
||||||
|
fasta = [ file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) ]
|
||||||
|
|
||||||
|
inputBed = [ [ id:'test'],
|
||||||
|
file(params.test_data['sarscov2']['genome']['test_bed'], checkIfExists: true)]
|
||||||
|
|
||||||
|
BCFTOOLS_MPILEUP ( input1, fasta, false )
|
||||||
|
BCFTOOLS_MPILEUP2 ( input2, fasta, false )
|
||||||
|
|
||||||
|
BCFTOOLS_MPILEUP2.out.vcf.
|
||||||
|
combine( BCFTOOLS_MPILEUP.out.vcf ).
|
||||||
|
map { [ it[1], it[3] ] }.
|
||||||
|
set { vcf_channel }
|
||||||
|
|
||||||
|
BEDTOOLS_MAKEWINDOWS( inputBed, true ).tab.
|
||||||
|
map { it[1] }.
|
||||||
|
view().
|
||||||
|
set { snp_channel }
|
||||||
|
|
||||||
|
NGSCHECKMATE_NCM_VCF(vcf_channel, snp_channel, fasta)
|
||||||
|
}
|
||||||
|
|
||||||
|
|
27
tests/modules/ngscheckmate/ncm/nextflow.config
Normal file
27
tests/modules/ngscheckmate/ncm/nextflow.config
Normal file
|
@ -0,0 +1,27 @@
|
||||||
|
process {
|
||||||
|
|
||||||
|
publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
|
||||||
|
|
||||||
|
withName: BEDTOOLS_MAKEWINDOWS {
|
||||||
|
ext.args = '-w 1'
|
||||||
|
}
|
||||||
|
|
||||||
|
withName: BCFTOOLS_MPILEUP {
|
||||||
|
ext.args2 = '--no-version --ploidy 1 --multiallelic-caller'
|
||||||
|
ext.args3 = '--no-version'
|
||||||
|
}
|
||||||
|
|
||||||
|
withName: BCFTOOLS_MPILEUP2 {
|
||||||
|
ext.args2 = '--no-version --ploidy 1 --multiallelic-caller'
|
||||||
|
ext.args3 = '--no-version'
|
||||||
|
}
|
||||||
|
|
||||||
|
withName: NGSCHECKMATE_NCM_VCF {
|
||||||
|
ext.args = '-V'
|
||||||
|
}
|
||||||
|
|
||||||
|
withName: NGSCHECKMATE_NCM_BAM {
|
||||||
|
ext.args = '-B'
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
29
tests/modules/ngscheckmate/ncm/test.yml
Normal file
29
tests/modules/ngscheckmate/ncm/test.yml
Normal file
|
@ -0,0 +1,29 @@
|
||||||
|
- name: ngscheckmate ncm test_ngscheckmate_ncm_bam
|
||||||
|
command: nextflow run tests/modules/ngscheckmate/ncm -entry test_ngscheckmate_ncm_bam -c tests/config/nextflow.config
|
||||||
|
tags:
|
||||||
|
- ngscheckmate/ncm
|
||||||
|
- ngscheckmate
|
||||||
|
files:
|
||||||
|
- path: output/ngscheckmate/output_all.txt
|
||||||
|
md5sum: f71a712c3f6ecf64dd526365212f1b7c
|
||||||
|
- path: output/ngscheckmate/output_corr_matrix.txt
|
||||||
|
md5sum: 6777377aa9ae3d57f841b12896318db0
|
||||||
|
- path: output/ngscheckmate/output_matched.txt
|
||||||
|
md5sum: f71a712c3f6ecf64dd526365212f1b7c
|
||||||
|
- path: output/ngscheckmate/versions.yml
|
||||||
|
md5sum: fbb2bebd65b4f4e1e93c6bf5c08a6829
|
||||||
|
|
||||||
|
- name: ngscheckmate ncm test_ngscheckmate_ncm_vcf
|
||||||
|
command: nextflow run tests/modules/ngscheckmate/ncm -entry test_ngscheckmate_ncm_vcf -c tests/config/nextflow.config
|
||||||
|
tags:
|
||||||
|
- ngscheckmate/ncm
|
||||||
|
- ngscheckmate
|
||||||
|
files:
|
||||||
|
- path: output/ngscheckmate/output_all.txt
|
||||||
|
md5sum: fd74956dcac279b6f58e82ea73e344f8
|
||||||
|
- path: output/ngscheckmate/output_corr_matrix.txt
|
||||||
|
md5sum: 0c86bdad2721c470fe6be119f291c8e5
|
||||||
|
- path: output/ngscheckmate/output_matched.txt
|
||||||
|
md5sum: fd74956dcac279b6f58e82ea73e344f8
|
||||||
|
- path: output/ngscheckmate/versions.yml
|
||||||
|
md5sum: f06910b83dde194a47870c553cefe193
|
Loading…
Reference in a new issue