mirror of
https://github.com/MillironX/nf-core_modules.git
synced 2025-01-04 13:32:09 -05:00
New module: scramble (#2015)
* added scramble/clusteridentifier * linting * added cluster_analysis * added a comment to the mei ref * added reference comments * linting
This commit is contained in:
parent
e726b1730d
commit
f2264c1052
12 changed files with 373 additions and 0 deletions
53
modules/scramble/clusteranalysis/main.nf
Normal file
53
modules/scramble/clusteranalysis/main.nf
Normal file
|
@ -0,0 +1,53 @@
|
||||||
|
process SCRAMBLE_CLUSTERANALYSIS {
|
||||||
|
tag "$meta.id"
|
||||||
|
label 'process_single'
|
||||||
|
|
||||||
|
conda (params.enable_conda ? "bioconda::scramble=1.0.1" : null)
|
||||||
|
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
|
||||||
|
'https://depot.galaxyproject.org/singularity/scramble:1.0.1--h779adbc_1':
|
||||||
|
'quay.io/biocontainers/scramble:1.0.1--h779adbc_1' }"
|
||||||
|
|
||||||
|
input:
|
||||||
|
tuple val(meta), path(clusters)
|
||||||
|
path fasta
|
||||||
|
path mei_ref
|
||||||
|
|
||||||
|
output:
|
||||||
|
tuple val(meta), path("*_MEIs.txt") , optional:true, emit: meis_tab
|
||||||
|
tuple val(meta), path("*_PredictedDeletions.txt") , optional:true, emit: dels_tab
|
||||||
|
tuple val(meta), path("*.vcf") , optional:true, emit: vcf
|
||||||
|
path "versions.yml" , emit: versions
|
||||||
|
|
||||||
|
when:
|
||||||
|
task.ext.when == null || task.ext.when
|
||||||
|
|
||||||
|
script:
|
||||||
|
def args = task.ext.args ?: ''
|
||||||
|
def prefix = task.ext.prefix ?: "${meta.id}"
|
||||||
|
def VERSION = '1.0.1' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions.
|
||||||
|
|
||||||
|
def blastdb = args.contains("--eval-dels") ? "makeblastdb -in ${fasta} -parse_seqids -title ${fasta} -dbtype nucl -out ${fasta}" : ""
|
||||||
|
def reference = fasta ? "--ref `pwd`/${fasta}" : ""
|
||||||
|
|
||||||
|
// The default file for the MEI reference is a file that's inside the container
|
||||||
|
def mei_reference = mei_ref ? "`pwd`/${mei_ref}" : "/usr/local/share/scramble/resources/MEI_consensus_seqs.fa"
|
||||||
|
|
||||||
|
def blastdb_version = args.contains("--eval-dels") ? "makeblastdb: \$(echo \$(makeblastdb -version 2>&1) | head -n 1 | sed 's/^makeblastdb: //; s/+ Package.*\$//')" : ""
|
||||||
|
"""
|
||||||
|
${blastdb}
|
||||||
|
|
||||||
|
Rscript --vanilla /usr/local/share/scramble/bin/SCRAMble.R \\
|
||||||
|
--install-dir /usr/local/share/scramble/bin \\
|
||||||
|
${args} \\
|
||||||
|
--cluster-file `pwd`/${clusters} \\
|
||||||
|
${reference} \\
|
||||||
|
--mei-refs ${mei_reference} \\
|
||||||
|
--out-name `pwd`/${prefix}
|
||||||
|
|
||||||
|
cat <<-END_VERSIONS > versions.yml
|
||||||
|
"${task.process}":
|
||||||
|
scramble: ${VERSION}
|
||||||
|
${blastdb_version}
|
||||||
|
END_VERSIONS
|
||||||
|
"""
|
||||||
|
}
|
58
modules/scramble/clusteranalysis/meta.yml
Normal file
58
modules/scramble/clusteranalysis/meta.yml
Normal file
|
@ -0,0 +1,58 @@
|
||||||
|
name: "scramble_clusteranalysis"
|
||||||
|
description: The Cluster Analysis tool of Scramble analyses and interprets the soft-clipped clusters found by `cluster_identifier`
|
||||||
|
keywords:
|
||||||
|
- soft-clipped clusters
|
||||||
|
- scramble
|
||||||
|
tools:
|
||||||
|
- "scramble":
|
||||||
|
description: "Soft Clipped Read Alignment Mapper"
|
||||||
|
homepage: "https://github.com/GeneDx/scramble"
|
||||||
|
documentation: "https://github.com/GeneDx/scramble"
|
||||||
|
tool_dev_url: "https://github.com/GeneDx/scramble"
|
||||||
|
doi: ""
|
||||||
|
licence: "['CC']"
|
||||||
|
|
||||||
|
input:
|
||||||
|
- meta:
|
||||||
|
type: map
|
||||||
|
description: |
|
||||||
|
Groovy Map containing sample information
|
||||||
|
e.g. [ id:'test', single_end:false ]
|
||||||
|
- clusters:
|
||||||
|
type: file
|
||||||
|
description: Tab-delimited text file containing soft-clipped clusters. Has to be generated using scramble/clusteridentifier
|
||||||
|
pattern: "*clusters.txt"
|
||||||
|
- fasta:
|
||||||
|
type: file
|
||||||
|
description: Optional fasta reference file. This file is needed to create a VCF file and to evaluate predicted deletions.
|
||||||
|
pattern: "*.{fasta,fa}"
|
||||||
|
- mei_ref:
|
||||||
|
type: file
|
||||||
|
description: Optional fasta file containing the MEI reference. This file should only be supplied in special occasions where the default isn't correct
|
||||||
|
pattern: "*.{fasta,fa}"
|
||||||
|
|
||||||
|
output:
|
||||||
|
- meta:
|
||||||
|
type: map
|
||||||
|
description: |
|
||||||
|
Groovy Map containing sample information
|
||||||
|
e.g. [ id:'test', single_end:false ]
|
||||||
|
- versions:
|
||||||
|
type: file
|
||||||
|
description: File containing software versions
|
||||||
|
pattern: "versions.yml"
|
||||||
|
- meis_tab:
|
||||||
|
type: file
|
||||||
|
description: Tab-delimited text file containing MEI calls
|
||||||
|
pattern: "*_MEIs.txt"
|
||||||
|
- dels_tab:
|
||||||
|
type: file
|
||||||
|
description: Tab-delimited text file containing predicted deletions
|
||||||
|
pattern: "*_PredictedDeletions.txt"
|
||||||
|
- vcf:
|
||||||
|
type: file
|
||||||
|
description: A VCF file containing the MEI calls and/or the predicted deletions (depending on the given arguments)
|
||||||
|
pattern: "*.vcf"
|
||||||
|
|
||||||
|
authors:
|
||||||
|
- "@nvnieuwk"
|
48
modules/scramble/clusteridentifier/main.nf
Normal file
48
modules/scramble/clusteridentifier/main.nf
Normal file
|
@ -0,0 +1,48 @@
|
||||||
|
process SCRAMBLE_CLUSTERIDENTIFIER {
|
||||||
|
tag "$meta.id"
|
||||||
|
label 'process_single'
|
||||||
|
|
||||||
|
conda (params.enable_conda ? "bioconda::scramble=1.0.1" : null)
|
||||||
|
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
|
||||||
|
'https://depot.galaxyproject.org/singularity/scramble:1.0.1--h779adbc_1':
|
||||||
|
'quay.io/biocontainers/scramble:1.0.1--h779adbc_1' }"
|
||||||
|
|
||||||
|
input:
|
||||||
|
tuple val(meta), path(input), path(input_index)
|
||||||
|
path fasta
|
||||||
|
|
||||||
|
output:
|
||||||
|
tuple val(meta), path("*.clusters.txt") , emit: clusters
|
||||||
|
path "versions.yml" , emit: versions
|
||||||
|
|
||||||
|
when:
|
||||||
|
task.ext.when == null || task.ext.when
|
||||||
|
|
||||||
|
script:
|
||||||
|
def args = task.ext.args ?: ''
|
||||||
|
def prefix = task.ext.prefix ?: "${meta.id}"
|
||||||
|
def VERSION = '1.0.1' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions.
|
||||||
|
|
||||||
|
// The tool does not contain a way to specify the reference file when using CRAM files.
|
||||||
|
// It just looks in the header of the CRAM file where the reference file is located,
|
||||||
|
// but that reference can't always be fetched since most test data is created on
|
||||||
|
// another machine. I had to find another way to specify the reference and I
|
||||||
|
// found that I could create an md5 cache of a specified fasta and supply it to
|
||||||
|
// the REF_PATH environment variable. This way the tool uses the correct reference.
|
||||||
|
// An issue has been made about this: https://github.com/GeneDx/scramble/issues/27
|
||||||
|
// The reference code is a placeholder until this issue has been fixed.
|
||||||
|
def reference = fasta ? "wget https://raw.githubusercontent.com/samtools/samtools/master/misc/seq_cache_populate.pl && perl seq_cache_populate.pl -root ./md5_ref ${fasta} && export REF_PATH=`pwd`/md5_ref/%2s/%2s/%s" : ""
|
||||||
|
"""
|
||||||
|
${reference}
|
||||||
|
|
||||||
|
cluster_identifier \\
|
||||||
|
${args} \\
|
||||||
|
${input} \\
|
||||||
|
> ${prefix}.clusters.txt
|
||||||
|
|
||||||
|
cat <<-END_VERSIONS > versions.yml
|
||||||
|
"${task.process}":
|
||||||
|
scramble: ${VERSION}
|
||||||
|
END_VERSIONS
|
||||||
|
"""
|
||||||
|
}
|
51
modules/scramble/clusteridentifier/meta.yml
Normal file
51
modules/scramble/clusteridentifier/meta.yml
Normal file
|
@ -0,0 +1,51 @@
|
||||||
|
name: "scramble_clusteridentifier"
|
||||||
|
description: The cluster_identifier tool of Scramble identifies soft clipped clusters
|
||||||
|
keywords:
|
||||||
|
- bam
|
||||||
|
- cram
|
||||||
|
- soft-clipped clusters
|
||||||
|
tools:
|
||||||
|
- "scramble":
|
||||||
|
description: "Soft Clipped Read Alignment Mapper"
|
||||||
|
homepage: "https://github.com/GeneDx/scramble"
|
||||||
|
documentation: "https://github.com/GeneDx/scramble"
|
||||||
|
tool_dev_url: "https://github.com/GeneDx/scramble"
|
||||||
|
doi: ""
|
||||||
|
licence: "['CC']"
|
||||||
|
|
||||||
|
input:
|
||||||
|
- meta:
|
||||||
|
type: map
|
||||||
|
description: |
|
||||||
|
Groovy Map containing sample information
|
||||||
|
e.g. [ id:'test', single_end:false ]
|
||||||
|
- input:
|
||||||
|
type: file
|
||||||
|
description: BAM/CRAM file
|
||||||
|
pattern: "*.{bam,cram}"
|
||||||
|
- input_index:
|
||||||
|
type: file
|
||||||
|
description: Index of the BAM/CRAM file
|
||||||
|
pattern: "*.{bai,crai}"
|
||||||
|
- fasta:
|
||||||
|
type: file
|
||||||
|
description: The reference FASTA file (mandatory when using CRAM files)
|
||||||
|
pattern: "*.{fasta,fa}"
|
||||||
|
|
||||||
|
output:
|
||||||
|
- meta:
|
||||||
|
type: map
|
||||||
|
description: |
|
||||||
|
Groovy Map containing sample information
|
||||||
|
e.g. [ id:'test', single_end:false ]
|
||||||
|
- versions:
|
||||||
|
type: file
|
||||||
|
description: File containing software versions
|
||||||
|
pattern: "versions.yml"
|
||||||
|
- clusters:
|
||||||
|
type: file
|
||||||
|
description: Tab-delimited file containing the soft-clipped clusters
|
||||||
|
pattern: "*.clusters.txt"
|
||||||
|
|
||||||
|
authors:
|
||||||
|
- "@nvnieuwk"
|
|
@ -2047,6 +2047,14 @@ scoary:
|
||||||
- modules/scoary/**
|
- modules/scoary/**
|
||||||
- tests/modules/scoary/**
|
- tests/modules/scoary/**
|
||||||
|
|
||||||
|
scramble/clusteranalysis:
|
||||||
|
- modules/scramble/clusteranalysis/**
|
||||||
|
- tests/modules/scramble/clusteranalysis/**
|
||||||
|
|
||||||
|
scramble/clusteridentifier:
|
||||||
|
- modules/scramble/clusteridentifier/**
|
||||||
|
- tests/modules/scramble/clusteridentifier/**
|
||||||
|
|
||||||
seacr/callpeak:
|
seacr/callpeak:
|
||||||
- modules/seacr/callpeak/**
|
- modules/seacr/callpeak/**
|
||||||
- tests/modules/seacr/callpeak/**
|
- tests/modules/seacr/callpeak/**
|
||||||
|
|
|
@ -368,6 +368,15 @@ params {
|
||||||
genemodel2 = "${test_data_dir}/genomics/homo_sapiens/pacbio/bed/alz.ccs.fl.NEB_5p--NEB_Clontech_3p.flnc.clustered.singletons.merged.aligned_tc.2.bed"
|
genemodel2 = "${test_data_dir}/genomics/homo_sapiens/pacbio/bed/alz.ccs.fl.NEB_5p--NEB_Clontech_3p.flnc.clustered.singletons.merged.aligned_tc.2.bed"
|
||||||
filelist = "${test_data_dir}/genomics/homo_sapiens/pacbio/txt/filelist.txt"
|
filelist = "${test_data_dir}/genomics/homo_sapiens/pacbio/txt/filelist.txt"
|
||||||
}
|
}
|
||||||
|
'scramble' {
|
||||||
|
fasta = "${test_data_dir}/genomics/homo_sapiens/scramble/test.fa"
|
||||||
|
fasta_fai = "${test_data_dir}/genomics/homo_sapiens/scramble/test.fa.fai"
|
||||||
|
bam = "${test_data_dir}/genomics/homo_sapiens/scramble/test.bam"
|
||||||
|
bam_bai = "${test_data_dir}/genomics/homo_sapiens/scramble/test.bam.bai"
|
||||||
|
cram = "${test_data_dir}/genomics/homo_sapiens/scramble/test.cram"
|
||||||
|
cram_crai = "${test_data_dir}/genomics/homo_sapiens/scramble/test.cram.crai"
|
||||||
|
bed = "${test_data_dir}/genomics/homo_sapiens/scramble/test.bed"
|
||||||
|
}
|
||||||
}
|
}
|
||||||
'bacteroides_fragilis' {
|
'bacteroides_fragilis' {
|
||||||
'genome' {
|
'genome' {
|
||||||
|
|
54
tests/modules/scramble/clusteranalysis/main.nf
Normal file
54
tests/modules/scramble/clusteranalysis/main.nf
Normal file
|
@ -0,0 +1,54 @@
|
||||||
|
#!/usr/bin/env nextflow
|
||||||
|
|
||||||
|
nextflow.enable.dsl = 2
|
||||||
|
|
||||||
|
include { SCRAMBLE_CLUSTERANALYSIS } from '../../../../modules/scramble/clusteranalysis/main.nf'
|
||||||
|
include { SCRAMBLE_CLUSTERIDENTIFIER } from '../../../../modules/scramble/clusteridentifier/main.nf'
|
||||||
|
|
||||||
|
workflow test_scramble_clusteranalysis {
|
||||||
|
|
||||||
|
input = [
|
||||||
|
[ id:'test', single_end:false ], // meta map
|
||||||
|
file(params.test_data['homo_sapiens']['scramble']['bam'], checkIfExists: true),
|
||||||
|
file(params.test_data['homo_sapiens']['scramble']['bam_bai'], checkIfExists: true),
|
||||||
|
[]
|
||||||
|
]
|
||||||
|
|
||||||
|
fasta = []
|
||||||
|
mei_ref = []
|
||||||
|
|
||||||
|
SCRAMBLE_CLUSTERIDENTIFIER(
|
||||||
|
input,
|
||||||
|
fasta
|
||||||
|
)
|
||||||
|
|
||||||
|
SCRAMBLE_CLUSTERANALYSIS (
|
||||||
|
SCRAMBLE_CLUSTERIDENTIFIER.out.clusters,
|
||||||
|
fasta,
|
||||||
|
mei_ref
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
workflow test_scramble_clusteranalysis_fasta {
|
||||||
|
|
||||||
|
input = [
|
||||||
|
[ id:'test', single_end:false ], // meta map
|
||||||
|
file(params.test_data['homo_sapiens']['scramble']['cram'], checkIfExists: true),
|
||||||
|
file(params.test_data['homo_sapiens']['scramble']['cram_crai'], checkIfExists: true),
|
||||||
|
[]
|
||||||
|
]
|
||||||
|
|
||||||
|
fasta = file(params.test_data['homo_sapiens']['scramble']['fasta'], checkIfExists: true)
|
||||||
|
mei_ref = []
|
||||||
|
|
||||||
|
SCRAMBLE_CLUSTERIDENTIFIER(
|
||||||
|
input,
|
||||||
|
fasta
|
||||||
|
)
|
||||||
|
|
||||||
|
SCRAMBLE_CLUSTERANALYSIS (
|
||||||
|
SCRAMBLE_CLUSTERIDENTIFIER.out.clusters,
|
||||||
|
fasta,
|
||||||
|
mei_ref
|
||||||
|
)
|
||||||
|
}
|
12
tests/modules/scramble/clusteranalysis/nextflow.config
Normal file
12
tests/modules/scramble/clusteranalysis/nextflow.config
Normal file
|
@ -0,0 +1,12 @@
|
||||||
|
process {
|
||||||
|
|
||||||
|
publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
|
||||||
|
|
||||||
|
withName: "test_scramble_clusteranalysis:SCRAMBLE_CLUSTERANALYSIS" {
|
||||||
|
ext.args = "--eval-meis"
|
||||||
|
}
|
||||||
|
|
||||||
|
withName: "test_scramble_clusteranalysis_fasta:SCRAMBLE_CLUSTERANALYSIS" {
|
||||||
|
ext.args = "--eval-meis --eval-dels"
|
||||||
|
}
|
||||||
|
}
|
25
tests/modules/scramble/clusteranalysis/test.yml
Normal file
25
tests/modules/scramble/clusteranalysis/test.yml
Normal file
|
@ -0,0 +1,25 @@
|
||||||
|
- name: scramble clusteranalysis test_scramble_clusteranalysis
|
||||||
|
command: nextflow run ./tests/modules/scramble/clusteranalysis -entry test_scramble_clusteranalysis -c ./tests/config/nextflow.config -c ./tests/modules/scramble/clusteranalysis/nextflow.config
|
||||||
|
tags:
|
||||||
|
- scramble/clusteranalysis
|
||||||
|
- scramble
|
||||||
|
files:
|
||||||
|
- path: output/scramble/test.clusters.txt
|
||||||
|
md5sum: 9b2777a44bfbcff8fac1bf67c3985f1f
|
||||||
|
- path: output/scramble/test_MEIs.txt
|
||||||
|
md5sum: a14c40c7e5f3630defde68ae1de51bca
|
||||||
|
|
||||||
|
- name: scramble clusteranalysis test_scramble_clusteranalysis_fasta
|
||||||
|
command: nextflow run ./tests/modules/scramble/clusteranalysis -entry test_scramble_clusteranalysis_fasta -c ./tests/config/nextflow.config -c ./tests/modules/scramble/clusteranalysis/nextflow.config
|
||||||
|
tags:
|
||||||
|
- scramble/clusteranalysis
|
||||||
|
- scramble
|
||||||
|
files:
|
||||||
|
- path: output/scramble/test.clusters.txt
|
||||||
|
md5sum: 9b2777a44bfbcff8fac1bf67c3985f1f
|
||||||
|
- path: output/scramble/test.vcf
|
||||||
|
contains: [fileformat=VCFv4.2]
|
||||||
|
- path: output/scramble/test_MEIs.txt
|
||||||
|
md5sum: a14c40c7e5f3630defde68ae1de51bca
|
||||||
|
- path: output/scramble/test_PredictedDeletions.txt
|
||||||
|
md5sum: 1fa0d3d0a58fdf81bd259b3c71774ba8
|
33
tests/modules/scramble/clusteridentifier/main.nf
Normal file
33
tests/modules/scramble/clusteridentifier/main.nf
Normal file
|
@ -0,0 +1,33 @@
|
||||||
|
#!/usr/bin/env nextflow
|
||||||
|
|
||||||
|
nextflow.enable.dsl = 2
|
||||||
|
|
||||||
|
include { SCRAMBLE_CLUSTERIDENTIFIER } from '../../../../modules/scramble/clusteridentifier/main.nf'
|
||||||
|
|
||||||
|
workflow test_scramble_clusteridentifier_bam {
|
||||||
|
|
||||||
|
input = [
|
||||||
|
[ id:'test', single_end:false ], // meta map
|
||||||
|
file(params.test_data['homo_sapiens']['scramble']['bam'], checkIfExists: true),
|
||||||
|
file(params.test_data['homo_sapiens']['scramble']['bam_bai'], checkIfExists: true),
|
||||||
|
[]
|
||||||
|
]
|
||||||
|
|
||||||
|
fasta = []
|
||||||
|
|
||||||
|
SCRAMBLE_CLUSTERIDENTIFIER ( input, fasta )
|
||||||
|
}
|
||||||
|
|
||||||
|
workflow test_scramble_clusteridentifier_cram {
|
||||||
|
|
||||||
|
input = [
|
||||||
|
[ id:'test', single_end:false ], // meta map
|
||||||
|
file(params.test_data['homo_sapiens']['scramble']['cram'], checkIfExists: true),
|
||||||
|
file(params.test_data['homo_sapiens']['scramble']['cram_crai'], checkIfExists: true),
|
||||||
|
[]
|
||||||
|
]
|
||||||
|
|
||||||
|
fasta = file(params.test_data['homo_sapiens']['scramble']['fasta'], checkIfExists: true)
|
||||||
|
|
||||||
|
SCRAMBLE_CLUSTERIDENTIFIER ( input, fasta )
|
||||||
|
}
|
5
tests/modules/scramble/clusteridentifier/nextflow.config
Normal file
5
tests/modules/scramble/clusteridentifier/nextflow.config
Normal file
|
@ -0,0 +1,5 @@
|
||||||
|
process {
|
||||||
|
|
||||||
|
publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
|
||||||
|
|
||||||
|
}
|
17
tests/modules/scramble/clusteridentifier/test.yml
Normal file
17
tests/modules/scramble/clusteridentifier/test.yml
Normal file
|
@ -0,0 +1,17 @@
|
||||||
|
- name: scramble clusteridentifier test_scramble_clusteridentifier_bam
|
||||||
|
command: nextflow run ./tests/modules/scramble/clusteridentifier -entry test_scramble_clusteridentifier_bam -c ./tests/config/nextflow.config -c ./tests/modules/scramble/clusteridentifier/nextflow.config
|
||||||
|
tags:
|
||||||
|
- scramble/clusteridentifier
|
||||||
|
- scramble
|
||||||
|
files:
|
||||||
|
- path: output/scramble/test.clusters.txt
|
||||||
|
md5sum: 9b2777a44bfbcff8fac1bf67c3985f1f
|
||||||
|
|
||||||
|
- name: scramble clusteridentifier test_scramble_clusteridentifier_cram
|
||||||
|
command: nextflow run ./tests/modules/scramble/clusteridentifier -entry test_scramble_clusteridentifier_cram -c ./tests/config/nextflow.config -c ./tests/modules/scramble/clusteridentifier/nextflow.config
|
||||||
|
tags:
|
||||||
|
- scramble/clusteridentifier
|
||||||
|
- scramble
|
||||||
|
files:
|
||||||
|
- path: output/scramble/test.clusters.txt
|
||||||
|
md5sum: 9b2777a44bfbcff8fac1bf67c3985f1f
|
Loading…
Reference in a new issue