diff --git a/modules/scramble/clusteranalysis/main.nf b/modules/scramble/clusteranalysis/main.nf new file mode 100644 index 00000000..cceceb05 --- /dev/null +++ b/modules/scramble/clusteranalysis/main.nf @@ -0,0 +1,53 @@ +process SCRAMBLE_CLUSTERANALYSIS { + tag "$meta.id" + label 'process_single' + + conda (params.enable_conda ? "bioconda::scramble=1.0.1" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/scramble:1.0.1--h779adbc_1': + 'quay.io/biocontainers/scramble:1.0.1--h779adbc_1' }" + + input: + tuple val(meta), path(clusters) + path fasta + path mei_ref + + output: + tuple val(meta), path("*_MEIs.txt") , optional:true, emit: meis_tab + tuple val(meta), path("*_PredictedDeletions.txt") , optional:true, emit: dels_tab + tuple val(meta), path("*.vcf") , optional:true, emit: vcf + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def VERSION = '1.0.1' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + + def blastdb = args.contains("--eval-dels") ? "makeblastdb -in ${fasta} -parse_seqids -title ${fasta} -dbtype nucl -out ${fasta}" : "" + def reference = fasta ? "--ref `pwd`/${fasta}" : "" + + // The default file for the MEI reference is a file that's inside the container + def mei_reference = mei_ref ? "`pwd`/${mei_ref}" : "/usr/local/share/scramble/resources/MEI_consensus_seqs.fa" + + def blastdb_version = args.contains("--eval-dels") ? "makeblastdb: \$(echo \$(makeblastdb -version 2>&1) | head -n 1 | sed 's/^makeblastdb: //; s/+ Package.*\$//')" : "" + """ + ${blastdb} + + Rscript --vanilla /usr/local/share/scramble/bin/SCRAMble.R \\ + --install-dir /usr/local/share/scramble/bin \\ + ${args} \\ + --cluster-file `pwd`/${clusters} \\ + ${reference} \\ + --mei-refs ${mei_reference} \\ + --out-name `pwd`/${prefix} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + scramble: ${VERSION} + ${blastdb_version} + END_VERSIONS + """ +} diff --git a/modules/scramble/clusteranalysis/meta.yml b/modules/scramble/clusteranalysis/meta.yml new file mode 100644 index 00000000..15adf7a6 --- /dev/null +++ b/modules/scramble/clusteranalysis/meta.yml @@ -0,0 +1,58 @@ +name: "scramble_clusteranalysis" +description: The Cluster Analysis tool of Scramble analyses and interprets the soft-clipped clusters found by `cluster_identifier` +keywords: + - soft-clipped clusters + - scramble +tools: + - "scramble": + description: "Soft Clipped Read Alignment Mapper" + homepage: "https://github.com/GeneDx/scramble" + documentation: "https://github.com/GeneDx/scramble" + tool_dev_url: "https://github.com/GeneDx/scramble" + doi: "" + licence: "['CC']" + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - clusters: + type: file + description: Tab-delimited text file containing soft-clipped clusters. Has to be generated using scramble/clusteridentifier + pattern: "*clusters.txt" + - fasta: + type: file + description: Optional fasta reference file. This file is needed to create a VCF file and to evaluate predicted deletions. + pattern: "*.{fasta,fa}" + - mei_ref: + type: file + description: Optional fasta file containing the MEI reference. This file should only be supplied in special occasions where the default isn't correct + pattern: "*.{fasta,fa}" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - meis_tab: + type: file + description: Tab-delimited text file containing MEI calls + pattern: "*_MEIs.txt" + - dels_tab: + type: file + description: Tab-delimited text file containing predicted deletions + pattern: "*_PredictedDeletions.txt" + - vcf: + type: file + description: A VCF file containing the MEI calls and/or the predicted deletions (depending on the given arguments) + pattern: "*.vcf" + +authors: + - "@nvnieuwk" diff --git a/modules/scramble/clusteridentifier/main.nf b/modules/scramble/clusteridentifier/main.nf new file mode 100644 index 00000000..1a90396e --- /dev/null +++ b/modules/scramble/clusteridentifier/main.nf @@ -0,0 +1,48 @@ +process SCRAMBLE_CLUSTERIDENTIFIER { + tag "$meta.id" + label 'process_single' + + conda (params.enable_conda ? "bioconda::scramble=1.0.1" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/scramble:1.0.1--h779adbc_1': + 'quay.io/biocontainers/scramble:1.0.1--h779adbc_1' }" + + input: + tuple val(meta), path(input), path(input_index) + path fasta + + output: + tuple val(meta), path("*.clusters.txt") , emit: clusters + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def VERSION = '1.0.1' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + + // The tool does not contain a way to specify the reference file when using CRAM files. + // It just looks in the header of the CRAM file where the reference file is located, + // but that reference can't always be fetched since most test data is created on + // another machine. I had to find another way to specify the reference and I + // found that I could create an md5 cache of a specified fasta and supply it to + // the REF_PATH environment variable. This way the tool uses the correct reference. + // An issue has been made about this: https://github.com/GeneDx/scramble/issues/27 + // The reference code is a placeholder until this issue has been fixed. + def reference = fasta ? "wget https://raw.githubusercontent.com/samtools/samtools/master/misc/seq_cache_populate.pl && perl seq_cache_populate.pl -root ./md5_ref ${fasta} && export REF_PATH=`pwd`/md5_ref/%2s/%2s/%s" : "" + """ + ${reference} + + cluster_identifier \\ + ${args} \\ + ${input} \\ + > ${prefix}.clusters.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + scramble: ${VERSION} + END_VERSIONS + """ +} diff --git a/modules/scramble/clusteridentifier/meta.yml b/modules/scramble/clusteridentifier/meta.yml new file mode 100644 index 00000000..b1b733fb --- /dev/null +++ b/modules/scramble/clusteridentifier/meta.yml @@ -0,0 +1,51 @@ +name: "scramble_clusteridentifier" +description: The cluster_identifier tool of Scramble identifies soft clipped clusters +keywords: + - bam + - cram + - soft-clipped clusters +tools: + - "scramble": + description: "Soft Clipped Read Alignment Mapper" + homepage: "https://github.com/GeneDx/scramble" + documentation: "https://github.com/GeneDx/scramble" + tool_dev_url: "https://github.com/GeneDx/scramble" + doi: "" + licence: "['CC']" + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: BAM/CRAM file + pattern: "*.{bam,cram}" + - input_index: + type: file + description: Index of the BAM/CRAM file + pattern: "*.{bai,crai}" + - fasta: + type: file + description: The reference FASTA file (mandatory when using CRAM files) + pattern: "*.{fasta,fa}" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - clusters: + type: file + description: Tab-delimited file containing the soft-clipped clusters + pattern: "*.clusters.txt" + +authors: + - "@nvnieuwk" diff --git a/tests/config/pytest_modules.yml b/tests/config/pytest_modules.yml index 6138fc55..cba06839 100644 --- a/tests/config/pytest_modules.yml +++ b/tests/config/pytest_modules.yml @@ -2047,6 +2047,14 @@ scoary: - modules/scoary/** - tests/modules/scoary/** +scramble/clusteranalysis: + - modules/scramble/clusteranalysis/** + - tests/modules/scramble/clusteranalysis/** + +scramble/clusteridentifier: + - modules/scramble/clusteridentifier/** + - tests/modules/scramble/clusteridentifier/** + seacr/callpeak: - modules/seacr/callpeak/** - tests/modules/seacr/callpeak/** diff --git a/tests/config/test_data.config b/tests/config/test_data.config index 68d39a92..0efc9480 100644 --- a/tests/config/test_data.config +++ b/tests/config/test_data.config @@ -368,6 +368,15 @@ params { genemodel2 = "${test_data_dir}/genomics/homo_sapiens/pacbio/bed/alz.ccs.fl.NEB_5p--NEB_Clontech_3p.flnc.clustered.singletons.merged.aligned_tc.2.bed" filelist = "${test_data_dir}/genomics/homo_sapiens/pacbio/txt/filelist.txt" } + 'scramble' { + fasta = "${test_data_dir}/genomics/homo_sapiens/scramble/test.fa" + fasta_fai = "${test_data_dir}/genomics/homo_sapiens/scramble/test.fa.fai" + bam = "${test_data_dir}/genomics/homo_sapiens/scramble/test.bam" + bam_bai = "${test_data_dir}/genomics/homo_sapiens/scramble/test.bam.bai" + cram = "${test_data_dir}/genomics/homo_sapiens/scramble/test.cram" + cram_crai = "${test_data_dir}/genomics/homo_sapiens/scramble/test.cram.crai" + bed = "${test_data_dir}/genomics/homo_sapiens/scramble/test.bed" + } } 'bacteroides_fragilis' { 'genome' { diff --git a/tests/modules/scramble/clusteranalysis/main.nf b/tests/modules/scramble/clusteranalysis/main.nf new file mode 100644 index 00000000..d31bac72 --- /dev/null +++ b/tests/modules/scramble/clusteranalysis/main.nf @@ -0,0 +1,54 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { SCRAMBLE_CLUSTERANALYSIS } from '../../../../modules/scramble/clusteranalysis/main.nf' +include { SCRAMBLE_CLUSTERIDENTIFIER } from '../../../../modules/scramble/clusteridentifier/main.nf' + +workflow test_scramble_clusteranalysis { + + input = [ + [ id:'test', single_end:false ], // meta map + file(params.test_data['homo_sapiens']['scramble']['bam'], checkIfExists: true), + file(params.test_data['homo_sapiens']['scramble']['bam_bai'], checkIfExists: true), + [] + ] + + fasta = [] + mei_ref = [] + + SCRAMBLE_CLUSTERIDENTIFIER( + input, + fasta + ) + + SCRAMBLE_CLUSTERANALYSIS ( + SCRAMBLE_CLUSTERIDENTIFIER.out.clusters, + fasta, + mei_ref + ) +} + +workflow test_scramble_clusteranalysis_fasta { + + input = [ + [ id:'test', single_end:false ], // meta map + file(params.test_data['homo_sapiens']['scramble']['cram'], checkIfExists: true), + file(params.test_data['homo_sapiens']['scramble']['cram_crai'], checkIfExists: true), + [] + ] + + fasta = file(params.test_data['homo_sapiens']['scramble']['fasta'], checkIfExists: true) + mei_ref = [] + + SCRAMBLE_CLUSTERIDENTIFIER( + input, + fasta + ) + + SCRAMBLE_CLUSTERANALYSIS ( + SCRAMBLE_CLUSTERIDENTIFIER.out.clusters, + fasta, + mei_ref + ) +} \ No newline at end of file diff --git a/tests/modules/scramble/clusteranalysis/nextflow.config b/tests/modules/scramble/clusteranalysis/nextflow.config new file mode 100644 index 00000000..86897cf4 --- /dev/null +++ b/tests/modules/scramble/clusteranalysis/nextflow.config @@ -0,0 +1,12 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + + withName: "test_scramble_clusteranalysis:SCRAMBLE_CLUSTERANALYSIS" { + ext.args = "--eval-meis" + } + + withName: "test_scramble_clusteranalysis_fasta:SCRAMBLE_CLUSTERANALYSIS" { + ext.args = "--eval-meis --eval-dels" + } +} \ No newline at end of file diff --git a/tests/modules/scramble/clusteranalysis/test.yml b/tests/modules/scramble/clusteranalysis/test.yml new file mode 100644 index 00000000..a0599ea3 --- /dev/null +++ b/tests/modules/scramble/clusteranalysis/test.yml @@ -0,0 +1,25 @@ +- name: scramble clusteranalysis test_scramble_clusteranalysis + command: nextflow run ./tests/modules/scramble/clusteranalysis -entry test_scramble_clusteranalysis -c ./tests/config/nextflow.config -c ./tests/modules/scramble/clusteranalysis/nextflow.config + tags: + - scramble/clusteranalysis + - scramble + files: + - path: output/scramble/test.clusters.txt + md5sum: 9b2777a44bfbcff8fac1bf67c3985f1f + - path: output/scramble/test_MEIs.txt + md5sum: a14c40c7e5f3630defde68ae1de51bca + +- name: scramble clusteranalysis test_scramble_clusteranalysis_fasta + command: nextflow run ./tests/modules/scramble/clusteranalysis -entry test_scramble_clusteranalysis_fasta -c ./tests/config/nextflow.config -c ./tests/modules/scramble/clusteranalysis/nextflow.config + tags: + - scramble/clusteranalysis + - scramble + files: + - path: output/scramble/test.clusters.txt + md5sum: 9b2777a44bfbcff8fac1bf67c3985f1f + - path: output/scramble/test.vcf + contains: [fileformat=VCFv4.2] + - path: output/scramble/test_MEIs.txt + md5sum: a14c40c7e5f3630defde68ae1de51bca + - path: output/scramble/test_PredictedDeletions.txt + md5sum: 1fa0d3d0a58fdf81bd259b3c71774ba8 diff --git a/tests/modules/scramble/clusteridentifier/main.nf b/tests/modules/scramble/clusteridentifier/main.nf new file mode 100644 index 00000000..6aea7f53 --- /dev/null +++ b/tests/modules/scramble/clusteridentifier/main.nf @@ -0,0 +1,33 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { SCRAMBLE_CLUSTERIDENTIFIER } from '../../../../modules/scramble/clusteridentifier/main.nf' + +workflow test_scramble_clusteridentifier_bam { + + input = [ + [ id:'test', single_end:false ], // meta map + file(params.test_data['homo_sapiens']['scramble']['bam'], checkIfExists: true), + file(params.test_data['homo_sapiens']['scramble']['bam_bai'], checkIfExists: true), + [] + ] + + fasta = [] + + SCRAMBLE_CLUSTERIDENTIFIER ( input, fasta ) +} + +workflow test_scramble_clusteridentifier_cram { + + input = [ + [ id:'test', single_end:false ], // meta map + file(params.test_data['homo_sapiens']['scramble']['cram'], checkIfExists: true), + file(params.test_data['homo_sapiens']['scramble']['cram_crai'], checkIfExists: true), + [] + ] + + fasta = file(params.test_data['homo_sapiens']['scramble']['fasta'], checkIfExists: true) + + SCRAMBLE_CLUSTERIDENTIFIER ( input, fasta ) +} diff --git a/tests/modules/scramble/clusteridentifier/nextflow.config b/tests/modules/scramble/clusteridentifier/nextflow.config new file mode 100644 index 00000000..50f50a7a --- /dev/null +++ b/tests/modules/scramble/clusteridentifier/nextflow.config @@ -0,0 +1,5 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + +} \ No newline at end of file diff --git a/tests/modules/scramble/clusteridentifier/test.yml b/tests/modules/scramble/clusteridentifier/test.yml new file mode 100644 index 00000000..c10a802e --- /dev/null +++ b/tests/modules/scramble/clusteridentifier/test.yml @@ -0,0 +1,17 @@ +- name: scramble clusteridentifier test_scramble_clusteridentifier_bam + command: nextflow run ./tests/modules/scramble/clusteridentifier -entry test_scramble_clusteridentifier_bam -c ./tests/config/nextflow.config -c ./tests/modules/scramble/clusteridentifier/nextflow.config + tags: + - scramble/clusteridentifier + - scramble + files: + - path: output/scramble/test.clusters.txt + md5sum: 9b2777a44bfbcff8fac1bf67c3985f1f + +- name: scramble clusteridentifier test_scramble_clusteridentifier_cram + command: nextflow run ./tests/modules/scramble/clusteridentifier -entry test_scramble_clusteridentifier_cram -c ./tests/config/nextflow.config -c ./tests/modules/scramble/clusteridentifier/nextflow.config + tags: + - scramble/clusteridentifier + - scramble + files: + - path: output/scramble/test.clusters.txt + md5sum: 9b2777a44bfbcff8fac1bf67c3985f1f