mirror of
https://github.com/MillironX/nf-core_modules.git
synced 2024-11-13 05:13:09 +00:00
New module: NGSCheckMate (#1290)
NGSCheckMate ncm mode, working on bam files and vcf files to check that (human) samples match as expected Co-authored-by: Simon Pearce <simon.pearce@cruk.manchester.ac.uk> Co-authored-by: Mahesh Binzer-Panchal <mahesh.binzer-panchal@nbis.se>
This commit is contained in:
parent
62da45b0e1
commit
79a9d5e1ea
6 changed files with 236 additions and 0 deletions
49
modules/ngscheckmate/ncm/main.nf
Normal file
49
modules/ngscheckmate/ncm/main.nf
Normal file
|
@ -0,0 +1,49 @@
|
|||
process NGSCHECKMATE_NCM {
|
||||
label 'process_low'
|
||||
|
||||
conda (params.enable_conda ? "bioconda::ngscheckmate=1.0.0" : null)
|
||||
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
|
||||
'https://depot.galaxyproject.org/singularity/ngscheckmate:1.0.0--py27r41hdfd78af_3':
|
||||
'quay.io/biocontainers/ngscheckmate:1.0.0--py27r41hdfd78af_3' }"
|
||||
|
||||
input:
|
||||
path files
|
||||
path snp_bed
|
||||
path fasta
|
||||
|
||||
output:
|
||||
path "*.pdf" , emit: pdf
|
||||
path "*_corr_matrix.txt", emit: corr_matrix
|
||||
path "*_matched.txt" , emit: matched
|
||||
path "*_all.txt" , emit: all
|
||||
path "*.vcf" , emit: vcfs, optional: true
|
||||
path "versions.yml" , emit: versions
|
||||
|
||||
when:
|
||||
task.ext.when == null || task.ext.when
|
||||
|
||||
script:
|
||||
def args = task.ext.args ?: ''
|
||||
def prefix = task.ext.prefix ?: "output"
|
||||
def unzip = files.any { it.toString().endsWith(".vcf.gz") }
|
||||
"""
|
||||
if $unzip
|
||||
then
|
||||
for VCFGZ in *.vcf.gz; do
|
||||
gunzip -cdf \$VCFGZ > \$( basename \$VCFGZ .gz );
|
||||
done
|
||||
fi
|
||||
|
||||
NCM_REF="./"${fasta} ncm.py -d . -bed ${snp_bed} -O . -N ${prefix} $args
|
||||
|
||||
if $unzip
|
||||
then
|
||||
rm -f *.vcf # clean up decompressed vcfs
|
||||
fi
|
||||
|
||||
cat <<-END_VERSIONS > versions.yml
|
||||
"${task.process}":
|
||||
ngscheckmate: \$(ncm.py --help | sed "7!d;s/ *Ensuring Sample Identity v//g")
|
||||
END_VERSIONS
|
||||
"""
|
||||
}
|
64
modules/ngscheckmate/ncm/meta.yml
Normal file
64
modules/ngscheckmate/ncm/meta.yml
Normal file
|
@ -0,0 +1,64 @@
|
|||
name: ngscheckmate_ncm
|
||||
description: Determining whether sequencing data comes from the same individual by using SNP matching. Designed for humans on vcf or bam files.
|
||||
keywords:
|
||||
- ngscheckmate
|
||||
- matching
|
||||
- snp
|
||||
tools:
|
||||
- ngscheckmate:
|
||||
description: NGSCheckMate is a software package for identifying next generation sequencing (NGS) data files from the same individual, including matching between DNA and RNA.
|
||||
homepage: https://github.com/parklab/NGSCheckMate
|
||||
documentation: https://github.com/parklab/NGSCheckMate
|
||||
tool_dev_url: https://github.com/parklab/NGSCheckMate
|
||||
doi: "doi:/10.1093/nar/gkx193"
|
||||
licence: ['MIT']
|
||||
|
||||
input:
|
||||
- files:
|
||||
type: file
|
||||
description: VCF or BAM files for each sample, in a merged channel (possibly gzipped). BAM files require an index too.
|
||||
pattern: "*.{vcf,vcf.gz,bam,bai}"
|
||||
|
||||
- snp_bed:
|
||||
type: file
|
||||
description: BED file containing the SNPs to analyse
|
||||
pattern: "*.{bed}"
|
||||
|
||||
- fasta:
|
||||
type: file
|
||||
description: fasta file for the genome, only used in the bam mode
|
||||
pattern: "*.{bed}"
|
||||
|
||||
output:
|
||||
- versions:
|
||||
type: file
|
||||
description: File containing software versions
|
||||
pattern: "versions.yml"
|
||||
|
||||
- pdf:
|
||||
type: file
|
||||
description: A pdf containing a dendrogram showing how the samples match up
|
||||
pattern: "*.{pdf}"
|
||||
|
||||
- corr_matrix:
|
||||
type: file
|
||||
description: A text file containing the correlation matrix between each sample
|
||||
pattern: "*corr_matrix.txt"
|
||||
|
||||
- matched:
|
||||
type: file
|
||||
description: A txt file containing only the samples that match with each other
|
||||
pattern: "*matched.txt"
|
||||
|
||||
- all:
|
||||
type: file
|
||||
description: A txt file containing all the sample comparisons, whether they match or not
|
||||
pattern: "*all.txt"
|
||||
|
||||
- vcfs:
|
||||
type: file
|
||||
description: If ran in bam mode, vcf files for each sample giving the SNP calls
|
||||
pattern: "*.vcf"
|
||||
|
||||
authors:
|
||||
- "@sppearce"
|
|
@ -1145,6 +1145,10 @@ ngmaster:
|
|||
- modules/ngmaster/**
|
||||
- tests/modules/ngmaster/**
|
||||
|
||||
ngscheckmate/ncm:
|
||||
- modules/ngscheckmate/ncm/**
|
||||
- tests/modules/ngscheckmate/ncm/**
|
||||
|
||||
nucmer:
|
||||
- modules/nucmer/**
|
||||
- tests/modules/nucmer/**
|
||||
|
|
63
tests/modules/ngscheckmate/ncm/main.nf
Normal file
63
tests/modules/ngscheckmate/ncm/main.nf
Normal file
|
@ -0,0 +1,63 @@
|
|||
#!/usr/bin/env nextflow
|
||||
|
||||
nextflow.enable.dsl = 2
|
||||
|
||||
include { NGSCHECKMATE_NCM as NGSCHECKMATE_NCM_BAM} from '../../../../modules/ngscheckmate/ncm/main.nf'
|
||||
include { NGSCHECKMATE_NCM as NGSCHECKMATE_NCM_VCF} from '../../../../modules/ngscheckmate/ncm/main.nf'
|
||||
|
||||
include { BEDTOOLS_MAKEWINDOWS } from '../../../../modules/bedtools/makewindows/main.nf'
|
||||
|
||||
include { BCFTOOLS_MPILEUP } from '../../../../modules/bcftools/mpileup/main.nf'
|
||||
include { BCFTOOLS_MPILEUP as BCFTOOLS_MPILEUP2 } from '../../../../modules/bcftools/mpileup/main.nf'
|
||||
|
||||
workflow test_ngscheckmate_ncm_bam {
|
||||
input = [ file(params.test_data['sarscov2']['illumina']['test_paired_end_methylated_sorted_bam'], checkIfExists: true),
|
||||
file(params.test_data['sarscov2']['illumina']['test_paired_end_methylated_sorted_bam_bai'], checkIfExists: true),
|
||||
file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true),
|
||||
file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true)]
|
||||
|
||||
fasta = [ file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) ]
|
||||
|
||||
inputBed = [ [ id:'test'],
|
||||
file(params.test_data['sarscov2']['genome']['test_bed'], checkIfExists: true)]
|
||||
|
||||
BEDTOOLS_MAKEWINDOWS(inputBed, true).
|
||||
tab.
|
||||
map{it[1]}.
|
||||
view().
|
||||
set{snp_channel}
|
||||
|
||||
NGSCHECKMATE_NCM_BAM(input, snp_channel, fasta)
|
||||
}
|
||||
|
||||
workflow test_ngscheckmate_ncm_vcf {
|
||||
input1 = [ [ id:'test1' ], // meta map
|
||||
[ file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true) ]
|
||||
]
|
||||
|
||||
input2 = [ [ id:'test2' ], // meta map
|
||||
[ file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true) ]
|
||||
]
|
||||
|
||||
fasta = [ file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) ]
|
||||
|
||||
inputBed = [ [ id:'test'],
|
||||
file(params.test_data['sarscov2']['genome']['test_bed'], checkIfExists: true)]
|
||||
|
||||
BCFTOOLS_MPILEUP ( input1, fasta, false )
|
||||
BCFTOOLS_MPILEUP2 ( input2, fasta, false )
|
||||
|
||||
BCFTOOLS_MPILEUP2.out.vcf.
|
||||
combine( BCFTOOLS_MPILEUP.out.vcf ).
|
||||
map { [ it[1], it[3] ] }.
|
||||
set { vcf_channel }
|
||||
|
||||
BEDTOOLS_MAKEWINDOWS( inputBed, true ).tab.
|
||||
map { it[1] }.
|
||||
view().
|
||||
set { snp_channel }
|
||||
|
||||
NGSCHECKMATE_NCM_VCF(vcf_channel, snp_channel, fasta)
|
||||
}
|
||||
|
||||
|
27
tests/modules/ngscheckmate/ncm/nextflow.config
Normal file
27
tests/modules/ngscheckmate/ncm/nextflow.config
Normal file
|
@ -0,0 +1,27 @@
|
|||
process {
|
||||
|
||||
publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
|
||||
|
||||
withName: BEDTOOLS_MAKEWINDOWS {
|
||||
ext.args = '-w 1'
|
||||
}
|
||||
|
||||
withName: BCFTOOLS_MPILEUP {
|
||||
ext.args2 = '--no-version --ploidy 1 --multiallelic-caller'
|
||||
ext.args3 = '--no-version'
|
||||
}
|
||||
|
||||
withName: BCFTOOLS_MPILEUP2 {
|
||||
ext.args2 = '--no-version --ploidy 1 --multiallelic-caller'
|
||||
ext.args3 = '--no-version'
|
||||
}
|
||||
|
||||
withName: NGSCHECKMATE_NCM_VCF {
|
||||
ext.args = '-V'
|
||||
}
|
||||
|
||||
withName: NGSCHECKMATE_NCM_BAM {
|
||||
ext.args = '-B'
|
||||
}
|
||||
|
||||
}
|
29
tests/modules/ngscheckmate/ncm/test.yml
Normal file
29
tests/modules/ngscheckmate/ncm/test.yml
Normal file
|
@ -0,0 +1,29 @@
|
|||
- name: ngscheckmate ncm test_ngscheckmate_ncm_bam
|
||||
command: nextflow run tests/modules/ngscheckmate/ncm -entry test_ngscheckmate_ncm_bam -c tests/config/nextflow.config
|
||||
tags:
|
||||
- ngscheckmate/ncm
|
||||
- ngscheckmate
|
||||
files:
|
||||
- path: output/ngscheckmate/output_all.txt
|
||||
md5sum: f71a712c3f6ecf64dd526365212f1b7c
|
||||
- path: output/ngscheckmate/output_corr_matrix.txt
|
||||
md5sum: 6777377aa9ae3d57f841b12896318db0
|
||||
- path: output/ngscheckmate/output_matched.txt
|
||||
md5sum: f71a712c3f6ecf64dd526365212f1b7c
|
||||
- path: output/ngscheckmate/versions.yml
|
||||
md5sum: fbb2bebd65b4f4e1e93c6bf5c08a6829
|
||||
|
||||
- name: ngscheckmate ncm test_ngscheckmate_ncm_vcf
|
||||
command: nextflow run tests/modules/ngscheckmate/ncm -entry test_ngscheckmate_ncm_vcf -c tests/config/nextflow.config
|
||||
tags:
|
||||
- ngscheckmate/ncm
|
||||
- ngscheckmate
|
||||
files:
|
||||
- path: output/ngscheckmate/output_all.txt
|
||||
md5sum: fd74956dcac279b6f58e82ea73e344f8
|
||||
- path: output/ngscheckmate/output_corr_matrix.txt
|
||||
md5sum: 0c86bdad2721c470fe6be119f291c8e5
|
||||
- path: output/ngscheckmate/output_matched.txt
|
||||
md5sum: fd74956dcac279b6f58e82ea73e344f8
|
||||
- path: output/ngscheckmate/versions.yml
|
||||
md5sum: f06910b83dde194a47870c553cefe193
|
Loading…
Reference in a new issue