From e7845e247247caaa350ee6f9d57fdb243a74b4a9 Mon Sep 17 00:00:00 2001 From: Adam Talbot <12817534+adamrtalbot@users.noreply.github.com> Date: Mon, 5 Sep 2022 12:00:18 +0100 Subject: [PATCH] 1976 msisensor2 (#1977) * msisensor2: Add msisensor2 module * msisensor2/scan module added * msisensor2 tests added Changes: - tests added for msisensor2 from the msisensor2 github repo. No available tests from NF-Core test repo. Relates to #1976 * msisensor2 slight bump in stuff * msisensor2: Replaced spelling of 'tumour' with 'tumor' * msisensor2: fix linting, testing and containers Relates to #1976 Co-authored-by: Maxime U. Garcia --- modules/msisensor2/msi/main.nf | 48 +++++++++++ modules/msisensor2/msi/meta.yml | 59 +++++++++++++ modules/msisensor2/scan/main.nf | 36 ++++++++ modules/msisensor2/scan/meta.yml | 34 ++++++++ tests/config/pytest_modules.yml | 8 ++ tests/modules/msisensor2/msi/main.nf | 84 +++++++++++++++++++ tests/modules/msisensor2/msi/nextflow.config | 5 ++ tests/modules/msisensor2/msi/test.yml | 39 +++++++++ tests/modules/msisensor2/scan/main.nf | 15 ++++ tests/modules/msisensor2/scan/nextflow.config | 5 ++ tests/modules/msisensor2/scan/test.yml | 8 ++ 11 files changed, 341 insertions(+) create mode 100644 modules/msisensor2/msi/main.nf create mode 100644 modules/msisensor2/msi/meta.yml create mode 100644 modules/msisensor2/scan/main.nf create mode 100644 modules/msisensor2/scan/meta.yml create mode 100644 tests/modules/msisensor2/msi/main.nf create mode 100644 tests/modules/msisensor2/msi/nextflow.config create mode 100644 tests/modules/msisensor2/msi/test.yml create mode 100644 tests/modules/msisensor2/scan/main.nf create mode 100644 tests/modules/msisensor2/scan/nextflow.config create mode 100644 tests/modules/msisensor2/scan/test.yml diff --git a/modules/msisensor2/msi/main.nf b/modules/msisensor2/msi/main.nf new file mode 100644 index 00000000..130014b7 --- /dev/null +++ b/modules/msisensor2/msi/main.nf @@ -0,0 +1,48 @@ +process MSISENSOR2_MSI { + tag "$meta.id" + label 'process_medium' + + conda (params.enable_conda ? "bioconda::msisensor2=0.1" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/msisensor2:0.1--hd03093a_0': + 'quay.io/biocontainers/msisensor2:0.1--hd03093a_0' }" + + input: + tuple val(meta), path(tumor_bam), path(tumor_bam_index), path(normal_bam), path(normal_bam_index), path(intervals) + path scan + path(models, stageAs: "models/*") + + output: + tuple val(meta), path("${prefix}") , emit: msi + tuple val(meta), path("${prefix}_dis") , emit: distribution + tuple val(meta), path("${prefix}_somatic"), emit: somatic + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + def scan_cmd = scan ? "-d $scan" : "" + def model_cmd = models ? "-M models/" : "" + def tumor_bam_cmd = tumor_bam ? "-t $tumor_bam" : "" + def normal_bam_cmd = normal_bam ? "-n $normal_bam" : "" + def interval_command = intervals ? "-e $intervals" : "" + """ + msisensor2 msi \\ + -b ${task.cpus} \\ + $args \\ + $model_cmd \\ + $scan_cmd \\ + $interval_command \\ + $tumor_bam_cmd \\ + $normal_bam_cmd \\ + -o $prefix + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + msisensor2: \$(echo \$(msisensor2 2> >(grep Version) | sed 's/Version: v//g')) + END_VERSIONS + """ +} diff --git a/modules/msisensor2/msi/meta.yml b/modules/msisensor2/msi/meta.yml new file mode 100644 index 00000000..3768cd65 --- /dev/null +++ b/modules/msisensor2/msi/meta.yml @@ -0,0 +1,59 @@ +name: "msisensor2_msi" +description: msisensor2 detection of MSI regions. +keywords: + - msi +tools: + - "msisensor2": + description: "MSIsensor2 is a novel algorithm based machine learning, featuring a large upgrade in the microsatellite instability (MSI) detection for tumor only sequencing data, including Cell-Free DNA (cfDNA), Formalin-Fixed Paraffin-Embedded(FFPE) and other sample types. The original MSIsensor is specially designed for tumor/normal paired sequencing data." + homepage: "https://github.com/niu-lab/msisensor2" + documentation: "https://github.com/niu-lab/msisensor2/blob/master/README.md" + tool_dev_url: "https://github.com/niu-lab/msisensor2" + doi: "" + licence: "GPL-3.0" + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + # + - tumor_bam: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + - normal_bam: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + - intervals: + type: file + description: BED + pattern: "*.bed" + - models: + type: file + description: Folder of MSISensor2 models (available from Github or as a product of msisensor2/scan) + pattern: "*/*" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - msi: + type: file + description: MSI classifications as a text file + - distribution: + type: file + description: Read count distributions of MSI regions + - somatic: + type: file + description: Somatic MSI regions detected. + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + +authors: + - "@adamrtalbot" diff --git a/modules/msisensor2/scan/main.nf b/modules/msisensor2/scan/main.nf new file mode 100644 index 00000000..c4cfe9dd --- /dev/null +++ b/modules/msisensor2/scan/main.nf @@ -0,0 +1,36 @@ +process MSISENSOR2_SCAN { + tag '$fasta' + label 'process_medium' + + conda (params.enable_conda ? "bioconda::msisensor2=0.1" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/msisensor2:0.1--hd03093a_0': + 'quay.io/biocontainers/msisensor2:0.1--hd03093a_0' }" + + input: + path fasta + val output + + output: + path output_path , emit: scan + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def inputs = fasta.collect{ "-d $it"}.join(" ") + output_path = output ?: "output.scan" + """ + msisensor2 scan \\ + $args \\ + $inputs \\ + -o $output_path + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + msisensor2: \$(echo \$(msisensor2 2> >(grep Version) | sed 's/Version: v//g')) + END_VERSIONS + """ +} diff --git a/modules/msisensor2/scan/meta.yml b/modules/msisensor2/scan/meta.yml new file mode 100644 index 00000000..66f3c4b3 --- /dev/null +++ b/modules/msisensor2/scan/meta.yml @@ -0,0 +1,34 @@ +name: "msisensor2_scan" +description: msisensor2 detection of MSI regions. +keywords: + - msi +tools: + - "msisensor2": + description: "MSIsensor2 is a novel algorithm based machine learning, featuring a large upgrade in the microsatellite instability (MSI) detection for tumor only sequencing data, including Cell-Free DNA (cfDNA), Formalin-Fixed Paraffin-Embedded(FFPE) and other sample types. The original MSIsensor is specially designed for tumor/normal paired sequencing data." + homepage: "https://github.com/niu-lab/msisensor2" + documentation: "https://github.com/niu-lab/msisensor2/blob/master/README.md" + tool_dev_url: "https://github.com/niu-lab/msisensor2" + doi: "" + licence: "GPL-3.0" + +input: + - fasta: + type: file + description: Path to reference genome. + pattern: "*.fasta" + - output: + type: val + description: Name of output MSI scan to write. + +output: + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - output: + type: file + description: Output scan of MSI regions in FASTA file. + pattern: "*" + +authors: + - "@adamrtalbot" diff --git a/tests/config/pytest_modules.yml b/tests/config/pytest_modules.yml index fda99231..b8ca1f1e 100644 --- a/tests/config/pytest_modules.yml +++ b/tests/config/pytest_modules.yml @@ -1570,6 +1570,14 @@ msisensor/scan: - modules/msisensor/scan/** - tests/modules/msisensor/scan/** +msisensor2/msi: + - modules/msisensor2/msi/** + - tests/modules/msisensor2/msi/** + +msisensor2/scan: + - modules/msisensor2/scan/** + - tests/modules/msisensor2/scan/** + msisensorpro/msi_somatic: - modules/msisensorpro/msi_somatic/** - tests/modules/msisensorpro/msi_somatic/** diff --git a/tests/modules/msisensor2/msi/main.nf b/tests/modules/msisensor2/msi/main.nf new file mode 100644 index 00000000..6105af62 --- /dev/null +++ b/tests/modules/msisensor2/msi/main.nf @@ -0,0 +1,84 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { MSISENSOR2_SCAN } from '../../../../modules/msisensor2/scan/main.nf' +include { MSISENSOR2_MSI } from '../../../../modules/msisensor2/msi/main.nf' + +workflow test_msisensor2_msi_tumor_only { + + reference = [ + file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + ] + + MSISENSOR2_SCAN ( reference, "outputfile" ) + + input = [ + [ id:'test' ], + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true), + [], + [], + [], + ] + + MSISENSOR2_MSI ( input, MSISENSOR2_SCAN.out.scan, [] ) +} + + +workflow test_msisensor2_msi_tumor_normal { + + reference = [ + file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + ] + + MSISENSOR2_SCAN ( reference, "outputfile" ) + + input = [ + [ id:'test' ], + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_sorted_bam'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_sorted_bam_bai'], checkIfExists: true), + [], + ] + + MSISENSOR2_MSI ( input, MSISENSOR2_SCAN.out.scan, [] ) +} + +workflow test_msisensor2_msi_tumor_only_ml { + + input = [ + [ id:'test' ], + file('https://github.com/niu-lab/msisensor2/raw/master/test/example.tumor.only.hg19.bam', checkIfExists: true), + file('https://github.com/niu-lab/msisensor2/raw/master/test/example.tumor.only.hg19.bam.bai', checkIfExists: true), + [], + [], + [], + ] + + models = Channel.fromPath( + [ + "https://github.com/niu-lab/msisensor2/raw/master/test/tmp/models_hg19_17sites/016a16e12aca2bdba3713a3be76f72cd", + "https://github.com/niu-lab/msisensor2/raw/master/test/tmp/models_hg19_17sites/02d42c2bda19aac304d6e86390c7f328", + "https://github.com/niu-lab/msisensor2/raw/master/test/tmp/models_hg19_17sites/1030c0aa35ca5c263daeae866ad18632", + "https://github.com/niu-lab/msisensor2/raw/master/test/tmp/models_hg19_17sites/15c3f5ec1c020d8f44283e40a2d9b6bb", + "https://github.com/niu-lab/msisensor2/raw/master/test/tmp/models_hg19_17sites/15d6012f9a234b7adbbeecec524aea7d", + "https://github.com/niu-lab/msisensor2/raw/master/test/tmp/models_hg19_17sites/2cf9a58f57e78b88acd86d792fe6a7b3", + "https://github.com/niu-lab/msisensor2/raw/master/test/tmp/models_hg19_17sites/3ba04410c7ccbfc33e8b1b11d8132ae9", + "https://github.com/niu-lab/msisensor2/raw/master/test/tmp/models_hg19_17sites/4431c9dc08be932c460a9e67192e7c57", + "https://github.com/niu-lab/msisensor2/raw/master/test/tmp/models_hg19_17sites/4f5fa7bed97b48093375222d242fc982", + "https://github.com/niu-lab/msisensor2/raw/master/test/tmp/models_hg19_17sites/71e6c0d59ea09d2a7acc566560841e34", + "https://github.com/niu-lab/msisensor2/raw/master/test/tmp/models_hg19_17sites/8144b15900bba7086e86b31a0e1f8cfd", + "https://github.com/niu-lab/msisensor2/raw/master/test/tmp/models_hg19_17sites/9bf6f7a544f369c3262a3a6f72cfdd7b", + "https://github.com/niu-lab/msisensor2/raw/master/test/tmp/models_hg19_17sites/b8a36f2274b33cb0ed932e85cd1ddd5a", + "https://github.com/niu-lab/msisensor2/raw/master/test/tmp/models_hg19_17sites/c08f164ded323a8c2606c408c555d73d", + "https://github.com/niu-lab/msisensor2/raw/master/test/tmp/models_hg19_17sites/ceaa36ddbb76dc6eb6199ed946945788", + "https://github.com/niu-lab/msisensor2/raw/master/test/tmp/models_hg19_17sites/e05d5da7208a924762311eddc4ec96c0", + "https://github.com/niu-lab/msisensor2/raw/master/test/tmp/models_hg19_17sites/f8a20acf51ccb2b0ce6af42f24a8b5ef", + ], + checkIfExists: true + ) + + MSISENSOR2_MSI ( input, [], models.collect() ) +} diff --git a/tests/modules/msisensor2/msi/nextflow.config b/tests/modules/msisensor2/msi/nextflow.config new file mode 100644 index 00000000..50f50a7a --- /dev/null +++ b/tests/modules/msisensor2/msi/nextflow.config @@ -0,0 +1,5 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + +} \ No newline at end of file diff --git a/tests/modules/msisensor2/msi/test.yml b/tests/modules/msisensor2/msi/test.yml new file mode 100644 index 00000000..c59128b3 --- /dev/null +++ b/tests/modules/msisensor2/msi/test.yml @@ -0,0 +1,39 @@ +- name: msisensor2 msi test_msisensor2_msi_tumor_only + command: nextflow run ./tests/modules/msisensor2/msi -entry test_msisensor2_msi_tumor_only -c ./tests/config/nextflow.config -c ./tests/modules/msisensor2/msi/nextflow.config + tags: + - msisensor2 + - msisensor2/msi + files: + - path: output/msisensor2/outputfile + md5sum: b1fa373ada56fd5027b705d5e2faa374 + - path: output/msisensor2/test + md5sum: a3290f7539dbbf83777e8590156c0e28 + - path: output/msisensor2/test_dis + md5sum: 236e814ddeb95e21a17b48250f92250d + - path: output/msisensor2/test_somatic + +- name: msisensor2 msi test_msisensor2_msi_tumor_normal + command: nextflow run ./tests/modules/msisensor2/msi -entry test_msisensor2_msi_tumor_normal -c ./tests/config/nextflow.config -c ./tests/modules/msisensor2/msi/nextflow.config + tags: + - msisensor2 + - msisensor2/msi + files: + - path: output/msisensor2/outputfile + md5sum: b1fa373ada56fd5027b705d5e2faa374 + - path: output/msisensor2/test + md5sum: a3290f7539dbbf83777e8590156c0e28 + - path: output/msisensor2/test_dis + md5sum: d47b22c13092707ed64645f118e82cc1 + - path: output/msisensor2/test_somatic + +- name: msisensor2 msi test_msisensor2_msi_tumor_only_ml + command: nextflow run ./tests/modules/msisensor2/msi -entry test_msisensor2_msi_tumor_only_ml -c ./tests/config/nextflow.config -c ./tests/modules/msisensor2/msi/nextflow.config + tags: + - msisensor2 + - msisensor2/msi + files: + - path: output/msisensor2/test + md5sum: a3290f7539dbbf83777e8590156c0e28 + - path: output/msisensor2/test_dis + md5sum: 85205504ea8652ebdee46ac07ee1a8f6 + - path: output/msisensor2/test_somatic diff --git a/tests/modules/msisensor2/scan/main.nf b/tests/modules/msisensor2/scan/main.nf new file mode 100644 index 00000000..52a600bd --- /dev/null +++ b/tests/modules/msisensor2/scan/main.nf @@ -0,0 +1,15 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { MSISENSOR2_SCAN } from '../../../../modules/msisensor2/scan/main.nf' + +workflow test_msisensor2_scan { + + input = [ + file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true), + file(params.test_data['homo_sapiens']['genome']['genome2_fasta'], checkIfExists: true) + ] + + MSISENSOR2_SCAN ( input, "outputfile" ) +} diff --git a/tests/modules/msisensor2/scan/nextflow.config b/tests/modules/msisensor2/scan/nextflow.config new file mode 100644 index 00000000..50f50a7a --- /dev/null +++ b/tests/modules/msisensor2/scan/nextflow.config @@ -0,0 +1,5 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + +} \ No newline at end of file diff --git a/tests/modules/msisensor2/scan/test.yml b/tests/modules/msisensor2/scan/test.yml new file mode 100644 index 00000000..c4eb5367 --- /dev/null +++ b/tests/modules/msisensor2/scan/test.yml @@ -0,0 +1,8 @@ +- name: msisensor2 scan test_msisensor2_scan + command: nextflow run ./tests/modules/msisensor2/scan -entry test_msisensor2_scan -c ./tests/config/nextflow.config -c ./tests/modules/msisensor2/scan/nextflow.config + tags: + - msisensor2/scan + - msisensor2 + files: + - path: output/msisensor2/outputfile + md5sum: bdc0a576f37bd95a59692f387d083092