Add GeneScopeFK (#1781)

This commit is contained in:
Mahesh Binzer-Panchal 2022-06-14 20:49:06 +02:00 committed by GitHub
parent 8ded6d441a
commit e91e99db30
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 160 additions and 0 deletions

View file

@ -0,0 +1,42 @@
process GENESCOPEFK {
tag "$meta.id"
label 'process_low'
if (params.enable_conda) {
error "Conda environments cannot be used when using the GeneScope tool. Please use docker or singularity containers."
}
container 'ghcr.io/nbisweden/fastk_genescopefk_merquryfk:1.0'
input:
tuple val(meta), path(fastk_histex_histogram)
output:
tuple val(meta), path("*_linear_plot.png") , emit: linear_plot
tuple val(meta), path("*_log_plot.png") , emit: log_plot
tuple val(meta), path("*_model.txt") , emit: model
tuple val(meta), path("*_summary.txt") , emit: summary
tuple val(meta), path("*_transformed_linear_plot.png"), emit: transformed_linear_plot
tuple val(meta), path("*_transformed_log_plot.png") , emit: transformed_log_plot
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def GENESCOPE_VERSION = '380815c420f50171f9234a0fd1ff426b39829b91' // Git commit id is used instead of GeneScopeFK.R -v as software is not release versioned.
"""
GeneScopeFK.R \\
$args \\
--input $fastk_histex_histogram \\
--output . \\
--name_prefix ${prefix}
cat <<-END_VERSIONS > versions.yml
"${task.process}":
genescope: $GENESCOPE_VERSION
r: \$( R --version | sed '1!d; s/.*version //; s/ .*//' )
END_VERSIONS
"""
}

View file

@ -0,0 +1,63 @@
name: "genescopefk"
description: A derivative of GenomeScope2.0 modified to work with FastK
keywords:
- k-mer
- genome profile
- histogram
tools:
- "genescopefk":
description: "A derivative of GenomeScope2.0 modified to work with FastK"
homepage: "https://github.com/thegenemyers/GENESCOPE.FK"
documentation: ""
tool_dev_url: "https://github.com/thegenemyers/GENESCOPE.FK"
doi: ""
licence: "https://github.com/thegenemyers/GENESCOPE.FK/blob/main/LICENSE"
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- fastk_histex_histogram:
type: file
description: A histogram formatted for GeneScope using the -G parameter from Fastk Histex
pattern: "*.hist"
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- linear_plot:
type: file
description: A GeneScope linear plot in PNG format
pattern: "*_linear_plot.png"
- transformed_linear_plot:
type: file
description: A GeneScope transformed linear plot in PNG format
pattern: "*_transformed_linear_plot.png"
- log_plot:
type: file
description: A GeneScope log plot in PNG format
pattern: "*_log_plot.png"
- transformed_log_plot:
type: file
description: A GeneScope transformed log plot in PNG format
pattern: "*_transformed_log_plot.png"
- model:
type: file
description: GeneScope model fit summary
pattern: "*_model.txt"
- summary:
type: file
description: GeneScope histogram summary
pattern: "*_summary.txt"
authors:
- "@mahesh-panchal"

View file

@ -907,6 +907,10 @@ gatk4/variantrecalibrator:
- modules/gatk4/variantrecalibrator/**
- tests/modules/gatk4/variantrecalibrator/**
genescopefk:
- modules/genescopefk/**
- tests/modules/genescopefk/**
genmap/index:
- modules/genmap/index/**
- tests/modules/genmap/index/**

View file

@ -0,0 +1,19 @@
#!/usr/bin/env nextflow
nextflow.enable.dsl = 2
include { FASTK_FASTK } from '../../../modules/fastk/fastk/main.nf'
include { FASTK_HISTEX } from '../../../modules/fastk/histex/main.nf'
include { GENESCOPEFK } from '../../../modules/genescopefk/main.nf'
workflow test_genescopefk {
input = [
[ id:'test' , single_end: true ], // meta map
file(params.test_data['homo_sapiens']['illumina']['test_1_fastq_gz'], checkIfExists: true)
]
FASTK_FASTK ( input )
FASTK_HISTEX ( FASTK_FASTK.out.hist )
GENESCOPEFK ( FASTK_HISTEX.out.hist )
}

View file

@ -0,0 +1,15 @@
process {
publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
withName: 'FASTK_.*' {
publishDir = [ enabled: false ]
}
withName: 'FASTK_HISTEX' {
ext.args = '-G'
}
withName: 'GENESCOPEFK' {
ext.args = '--kmer_length 40 --ploidy 1'
}
}

View file

@ -0,0 +1,17 @@
- name: genescopefk test_genescopefk
command: nextflow run ./tests/modules/genescopefk -entry test_genescopefk -c ./tests/config/nextflow.config -c ./tests/modules/genescopefk/nextflow.config
tags:
- genescopefk
files:
- path: output/genescopefk/test_linear_plot.png
md5sum: bb221e2e21a6c0d04dcb6ec8f1bf1cd7
- path: output/genescopefk/test_log_plot.png
md5sum: d71ad0c963775992043c8f3c55d27fc4
- path: output/genescopefk/test_model.txt
md5sum: 49e5a66cf02b53a3b20880bfabef2ee8
- path: output/genescopefk/test_summary.txt
md5sum: 34540dcc2ba4ab9530f8878bbdc0921c
- path: output/genescopefk/test_transformed_linear_plot.png
md5sum: 3c8e3ad315e6c15cdf51c87de277ec1f
- path: output/genescopefk/test_transformed_log_plot.png
md5sum: 90d983c6faa9b8a02a1d26b777feefc6