1976 msisensor2 (#1977)

* msisensor2: Add msisensor2 module

* msisensor2/scan module added

* msisensor2 tests added

Changes:
 - tests added for msisensor2 from the msisensor2 github repo. No
 available tests from NF-Core test repo.

Relates to #1976

* msisensor2 slight bump in stuff

* msisensor2: Replaced spelling of 'tumour' with 'tumor'

* msisensor2: fix linting, testing and containers

Relates to #1976

Co-authored-by: Maxime U. Garcia <max.u.garcia@gmail.com>
This commit is contained in:
Adam Talbot 2022-09-05 12:00:18 +01:00 committed by GitHub
parent 5a30294eec
commit e7845e2472
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
11 changed files with 341 additions and 0 deletions

View file

@ -0,0 +1,48 @@
process MSISENSOR2_MSI {
tag "$meta.id"
label 'process_medium'
conda (params.enable_conda ? "bioconda::msisensor2=0.1" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/msisensor2:0.1--hd03093a_0':
'quay.io/biocontainers/msisensor2:0.1--hd03093a_0' }"
input:
tuple val(meta), path(tumor_bam), path(tumor_bam_index), path(normal_bam), path(normal_bam_index), path(intervals)
path scan
path(models, stageAs: "models/*")
output:
tuple val(meta), path("${prefix}") , emit: msi
tuple val(meta), path("${prefix}_dis") , emit: distribution
tuple val(meta), path("${prefix}_somatic"), emit: somatic
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
prefix = task.ext.prefix ?: "${meta.id}"
def scan_cmd = scan ? "-d $scan" : ""
def model_cmd = models ? "-M models/" : ""
def tumor_bam_cmd = tumor_bam ? "-t $tumor_bam" : ""
def normal_bam_cmd = normal_bam ? "-n $normal_bam" : ""
def interval_command = intervals ? "-e $intervals" : ""
"""
msisensor2 msi \\
-b ${task.cpus} \\
$args \\
$model_cmd \\
$scan_cmd \\
$interval_command \\
$tumor_bam_cmd \\
$normal_bam_cmd \\
-o $prefix
cat <<-END_VERSIONS > versions.yml
"${task.process}":
msisensor2: \$(echo \$(msisensor2 2> >(grep Version) | sed 's/Version: v//g'))
END_VERSIONS
"""
}

View file

@ -0,0 +1,59 @@
name: "msisensor2_msi"
description: msisensor2 detection of MSI regions.
keywords:
- msi
tools:
- "msisensor2":
description: "MSIsensor2 is a novel algorithm based machine learning, featuring a large upgrade in the microsatellite instability (MSI) detection for tumor only sequencing data, including Cell-Free DNA (cfDNA), Formalin-Fixed Paraffin-Embedded(FFPE) and other sample types. The original MSIsensor is specially designed for tumor/normal paired sequencing data."
homepage: "https://github.com/niu-lab/msisensor2"
documentation: "https://github.com/niu-lab/msisensor2/blob/master/README.md"
tool_dev_url: "https://github.com/niu-lab/msisensor2"
doi: ""
licence: "GPL-3.0"
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
#
- tumor_bam:
type: file
description: BAM/CRAM/SAM file
pattern: "*.{bam,cram,sam}"
- normal_bam:
type: file
description: BAM/CRAM/SAM file
pattern: "*.{bam,cram,sam}"
- intervals:
type: file
description: BED
pattern: "*.bed"
- models:
type: file
description: Folder of MSISensor2 models (available from Github or as a product of msisensor2/scan)
pattern: "*/*"
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- msi:
type: file
description: MSI classifications as a text file
- distribution:
type: file
description: Read count distributions of MSI regions
- somatic:
type: file
description: Somatic MSI regions detected.
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
authors:
- "@adamrtalbot"

View file

@ -0,0 +1,36 @@
process MSISENSOR2_SCAN {
tag '$fasta'
label 'process_medium'
conda (params.enable_conda ? "bioconda::msisensor2=0.1" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/msisensor2:0.1--hd03093a_0':
'quay.io/biocontainers/msisensor2:0.1--hd03093a_0' }"
input:
path fasta
val output
output:
path output_path , emit: scan
path "versions.yml", emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
def inputs = fasta.collect{ "-d $it"}.join(" ")
output_path = output ?: "output.scan"
"""
msisensor2 scan \\
$args \\
$inputs \\
-o $output_path
cat <<-END_VERSIONS > versions.yml
"${task.process}":
msisensor2: \$(echo \$(msisensor2 2> >(grep Version) | sed 's/Version: v//g'))
END_VERSIONS
"""
}

View file

@ -0,0 +1,34 @@
name: "msisensor2_scan"
description: msisensor2 detection of MSI regions.
keywords:
- msi
tools:
- "msisensor2":
description: "MSIsensor2 is a novel algorithm based machine learning, featuring a large upgrade in the microsatellite instability (MSI) detection for tumor only sequencing data, including Cell-Free DNA (cfDNA), Formalin-Fixed Paraffin-Embedded(FFPE) and other sample types. The original MSIsensor is specially designed for tumor/normal paired sequencing data."
homepage: "https://github.com/niu-lab/msisensor2"
documentation: "https://github.com/niu-lab/msisensor2/blob/master/README.md"
tool_dev_url: "https://github.com/niu-lab/msisensor2"
doi: ""
licence: "GPL-3.0"
input:
- fasta:
type: file
description: Path to reference genome.
pattern: "*.fasta"
- output:
type: val
description: Name of output MSI scan to write.
output:
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- output:
type: file
description: Output scan of MSI regions in FASTA file.
pattern: "*"
authors:
- "@adamrtalbot"

View file

@ -1570,6 +1570,14 @@ msisensor/scan:
- modules/msisensor/scan/**
- tests/modules/msisensor/scan/**
msisensor2/msi:
- modules/msisensor2/msi/**
- tests/modules/msisensor2/msi/**
msisensor2/scan:
- modules/msisensor2/scan/**
- tests/modules/msisensor2/scan/**
msisensorpro/msi_somatic:
- modules/msisensorpro/msi_somatic/**
- tests/modules/msisensorpro/msi_somatic/**

View file

@ -0,0 +1,84 @@
#!/usr/bin/env nextflow
nextflow.enable.dsl = 2
include { MSISENSOR2_SCAN } from '../../../../modules/msisensor2/scan/main.nf'
include { MSISENSOR2_MSI } from '../../../../modules/msisensor2/msi/main.nf'
workflow test_msisensor2_msi_tumor_only {
reference = [
file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
]
MSISENSOR2_SCAN ( reference, "outputfile" )
input = [
[ id:'test' ],
file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true),
file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true),
[],
[],
[],
]
MSISENSOR2_MSI ( input, MSISENSOR2_SCAN.out.scan, [] )
}
workflow test_msisensor2_msi_tumor_normal {
reference = [
file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
]
MSISENSOR2_SCAN ( reference, "outputfile" )
input = [
[ id:'test' ],
file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true),
file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true),
file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_sorted_bam'], checkIfExists: true),
file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_sorted_bam_bai'], checkIfExists: true),
[],
]
MSISENSOR2_MSI ( input, MSISENSOR2_SCAN.out.scan, [] )
}
workflow test_msisensor2_msi_tumor_only_ml {
input = [
[ id:'test' ],
file('https://github.com/niu-lab/msisensor2/raw/master/test/example.tumor.only.hg19.bam', checkIfExists: true),
file('https://github.com/niu-lab/msisensor2/raw/master/test/example.tumor.only.hg19.bam.bai', checkIfExists: true),
[],
[],
[],
]
models = Channel.fromPath(
[
"https://github.com/niu-lab/msisensor2/raw/master/test/tmp/models_hg19_17sites/016a16e12aca2bdba3713a3be76f72cd",
"https://github.com/niu-lab/msisensor2/raw/master/test/tmp/models_hg19_17sites/02d42c2bda19aac304d6e86390c7f328",
"https://github.com/niu-lab/msisensor2/raw/master/test/tmp/models_hg19_17sites/1030c0aa35ca5c263daeae866ad18632",
"https://github.com/niu-lab/msisensor2/raw/master/test/tmp/models_hg19_17sites/15c3f5ec1c020d8f44283e40a2d9b6bb",
"https://github.com/niu-lab/msisensor2/raw/master/test/tmp/models_hg19_17sites/15d6012f9a234b7adbbeecec524aea7d",
"https://github.com/niu-lab/msisensor2/raw/master/test/tmp/models_hg19_17sites/2cf9a58f57e78b88acd86d792fe6a7b3",
"https://github.com/niu-lab/msisensor2/raw/master/test/tmp/models_hg19_17sites/3ba04410c7ccbfc33e8b1b11d8132ae9",
"https://github.com/niu-lab/msisensor2/raw/master/test/tmp/models_hg19_17sites/4431c9dc08be932c460a9e67192e7c57",
"https://github.com/niu-lab/msisensor2/raw/master/test/tmp/models_hg19_17sites/4f5fa7bed97b48093375222d242fc982",
"https://github.com/niu-lab/msisensor2/raw/master/test/tmp/models_hg19_17sites/71e6c0d59ea09d2a7acc566560841e34",
"https://github.com/niu-lab/msisensor2/raw/master/test/tmp/models_hg19_17sites/8144b15900bba7086e86b31a0e1f8cfd",
"https://github.com/niu-lab/msisensor2/raw/master/test/tmp/models_hg19_17sites/9bf6f7a544f369c3262a3a6f72cfdd7b",
"https://github.com/niu-lab/msisensor2/raw/master/test/tmp/models_hg19_17sites/b8a36f2274b33cb0ed932e85cd1ddd5a",
"https://github.com/niu-lab/msisensor2/raw/master/test/tmp/models_hg19_17sites/c08f164ded323a8c2606c408c555d73d",
"https://github.com/niu-lab/msisensor2/raw/master/test/tmp/models_hg19_17sites/ceaa36ddbb76dc6eb6199ed946945788",
"https://github.com/niu-lab/msisensor2/raw/master/test/tmp/models_hg19_17sites/e05d5da7208a924762311eddc4ec96c0",
"https://github.com/niu-lab/msisensor2/raw/master/test/tmp/models_hg19_17sites/f8a20acf51ccb2b0ce6af42f24a8b5ef",
],
checkIfExists: true
)
MSISENSOR2_MSI ( input, [], models.collect() )
}

View file

@ -0,0 +1,5 @@
process {
publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
}

View file

@ -0,0 +1,39 @@
- name: msisensor2 msi test_msisensor2_msi_tumor_only
command: nextflow run ./tests/modules/msisensor2/msi -entry test_msisensor2_msi_tumor_only -c ./tests/config/nextflow.config -c ./tests/modules/msisensor2/msi/nextflow.config
tags:
- msisensor2
- msisensor2/msi
files:
- path: output/msisensor2/outputfile
md5sum: b1fa373ada56fd5027b705d5e2faa374
- path: output/msisensor2/test
md5sum: a3290f7539dbbf83777e8590156c0e28
- path: output/msisensor2/test_dis
md5sum: 236e814ddeb95e21a17b48250f92250d
- path: output/msisensor2/test_somatic
- name: msisensor2 msi test_msisensor2_msi_tumor_normal
command: nextflow run ./tests/modules/msisensor2/msi -entry test_msisensor2_msi_tumor_normal -c ./tests/config/nextflow.config -c ./tests/modules/msisensor2/msi/nextflow.config
tags:
- msisensor2
- msisensor2/msi
files:
- path: output/msisensor2/outputfile
md5sum: b1fa373ada56fd5027b705d5e2faa374
- path: output/msisensor2/test
md5sum: a3290f7539dbbf83777e8590156c0e28
- path: output/msisensor2/test_dis
md5sum: d47b22c13092707ed64645f118e82cc1
- path: output/msisensor2/test_somatic
- name: msisensor2 msi test_msisensor2_msi_tumor_only_ml
command: nextflow run ./tests/modules/msisensor2/msi -entry test_msisensor2_msi_tumor_only_ml -c ./tests/config/nextflow.config -c ./tests/modules/msisensor2/msi/nextflow.config
tags:
- msisensor2
- msisensor2/msi
files:
- path: output/msisensor2/test
md5sum: a3290f7539dbbf83777e8590156c0e28
- path: output/msisensor2/test_dis
md5sum: 85205504ea8652ebdee46ac07ee1a8f6
- path: output/msisensor2/test_somatic

View file

@ -0,0 +1,15 @@
#!/usr/bin/env nextflow
nextflow.enable.dsl = 2
include { MSISENSOR2_SCAN } from '../../../../modules/msisensor2/scan/main.nf'
workflow test_msisensor2_scan {
input = [
file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true),
file(params.test_data['homo_sapiens']['genome']['genome2_fasta'], checkIfExists: true)
]
MSISENSOR2_SCAN ( input, "outputfile" )
}

View file

@ -0,0 +1,5 @@
process {
publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
}

View file

@ -0,0 +1,8 @@
- name: msisensor2 scan test_msisensor2_scan
command: nextflow run ./tests/modules/msisensor2/scan -entry test_msisensor2_scan -c ./tests/config/nextflow.config -c ./tests/modules/msisensor2/scan/nextflow.config
tags:
- msisensor2/scan
- msisensor2
files:
- path: output/msisensor2/outputfile
md5sum: bdc0a576f37bd95a59692f387d083092