diff --git a/README.md b/README.md index 5551c5a3..c4313f21 100644 --- a/README.md +++ b/README.md @@ -540,4 +540,6 @@ nextflow run /path/to/pipeline/ -c /path/to/custom_module.conf > Note that the nf-core/tools helper package has a `download` command to download all required pipeline > files + singularity containers + institutional configs + modules in one go for you, to make this process easier. +# New test data created for the module- sequenzautils/bam2seqz +The new test data is an output from another module- sequenzautils/bcwiggle- (which uses sarscov2 genome fasta file as an input). --> diff --git a/software/sequenzautils/bam2seqz/functions.nf b/software/sequenzautils/bam2seqz/functions.nf new file mode 100755 index 00000000..f177f0c8 --- /dev/null +++ b/software/sequenzautils/bam2seqz/functions.nf @@ -0,0 +1,60 @@ +/* + * ----------------------------------------------------- + * Utility functions used in nf-core DSL2 module files + * ----------------------------------------------------- + */ + +/* + * Extract name of software tool from process name using $task.process + */ +def getSoftwareName(task_process) { + return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase() +} + +/* + * Function to initialise default values and to generate a Groovy Map of available options for nf-core modules + */ +def initOptions(Map args) { + def Map options = [:] + options.args = args.args ?: '' + options.args2 = args.args2 ?: '' + options.args3 = args.args3 ?: '' + options.publish_by_id = args.publish_by_id ?: false + options.publish_dir = args.publish_dir ?: '' + options.publish_files = args.publish_files + options.suffix = args.suffix ?: '' + return options +} + +/* + * Tidy up and join elements of a list to return a path string + */ +def getPathFromList(path_list) { + def paths = path_list.findAll { item -> !item?.trim().isEmpty() } // Remove empty entries + paths = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes + return paths.join('/') +} + +/* + * Function to save/publish module results + */ +def saveFiles(Map args) { + if (!args.filename.endsWith('.version.txt')) { + def ioptions = initOptions(args.options) + def path_list = [ ioptions.publish_dir ?: args.publish_dir ] + if (ioptions.publish_by_id) { + path_list.add(args.publish_id) + } + if (ioptions.publish_files instanceof Map) { + for (ext in ioptions.publish_files) { + if (args.filename.endsWith(ext.key)) { + def ext_list = path_list.collect() + ext_list.add(ext.value) + return "${getPathFromList(ext_list)}/$args.filename" + } + } + } else if (ioptions.publish_files == null) { + return "${getPathFromList(path_list)}/$args.filename" + } + } +} diff --git a/software/sequenzautils/bam2seqz/main.nf b/software/sequenzautils/bam2seqz/main.nf new file mode 100644 index 00000000..8702fd3e --- /dev/null +++ b/software/sequenzautils/bam2seqz/main.nf @@ -0,0 +1,45 @@ +// Import generic module functions +include { initOptions; saveFiles; getSoftwareName } from './functions' + +params.options = [:] +options = initOptions(params.options) + +process SEQUENZAUTILS_BAM2SEQZ { + tag "$meta.id" + label 'process_medium' + publishDir "${params.outdir}", + mode: params.publish_dir_mode, + saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), publish_id:meta.id) } + + conda (params.enable_conda ? "bioconda::sequenza-utils=3.0.0" : null) + if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { + container "https://depot.galaxyproject.org/singularity/sequenza-utils:3.0.0--py38h6ed170a_2" + } else { + container "quay.io/biocontainers/sequenza-utils:3.0.0--py38h6ed170a_2" + } + + input: + tuple val(meta), path(normalbam), path(tumourbam) + path fasta + path wigfile + + output: + tuple val(meta), path("*.seqz.gz"), emit: seqz + path "*.version.txt" , emit: version + + script: + def software = getSoftwareName(task.process) + def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}" + """ + sequenza-utils \\ + bam2seqz \\ + $options.args \\ + -n $normalbam \\ + -t $tumourbam \\ + --fasta $fasta \\ + -gc $wigfile \\ + -o ${prefix}.seqz.gz + + echo \$(sequenzautils --version 2>&1) | sed 's/^.*sequenzautils //; s/Using.*\$//' > ${software}.version.txt + """ +} diff --git a/software/sequenzautils/bam2seqz/meta.yml b/software/sequenzautils/bam2seqz/meta.yml new file mode 100755 index 00000000..171e155c --- /dev/null +++ b/software/sequenzautils/bam2seqz/meta.yml @@ -0,0 +1,50 @@ +name: sequenzautils_bam2seqz +description: Sequenza-utils bam2seqz process BAM and Wiggle files to produce a seqz file +keywords: + - bam2seqz +tools: + - sequenzautils: + description: Sequenza-utils provides 3 main command line programs to transform common NGS file format - such as FASTA, BAM - to input files for the Sequenza R package. The program - bam2seqz - process a paired set of BAM/pileup files (tumour and matching normal), and GC-content genome-wide information, to extract the common positions with A and B alleles frequencies. + homepage: https://sequenza-utils.readthedocs.io/en/latest/index.html + documentation: https://sequenza-utils.readthedocs.io/en/latest/index.html + doi: 10.1093/annonc/mdu479 + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - normalbam: + type: file + description: BAM file from the reference/normal sample + pattern: "*.{bam}" + - tumourbam: + type: file + description: BAM file from the tumour sample + pattern: "*.{bam}" + - fasta: + type: file + description: Reference FASTA file + pattern: "*.{fasta}" + - wigfile: + type: file + description: GC content wiggle file + pattern: "*.{wig.gz}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - version: + type: file + description: File containing software version + pattern: "*.{version.txt}" + - seqz: + type: file + description: Seqz file + pattern: "*.{seqz.gz}" + +authors: + - "@kaurravneet4123" diff --git a/tests/config/pytest_software.yml b/tests/config/pytest_software.yml index 688c9597..a4c2b906 100644 --- a/tests/config/pytest_software.yml +++ b/tests/config/pytest_software.yml @@ -206,9 +206,6 @@ gatk4_createsequencedictionary: - software/gatk4/createsequencedictionary/** - tests/software/gatk4/createsequencedictionary/** -gatk4_markduplicates: - - software/gatk4/markduplicates/** - - tests/software/gatk4/markduplicates/** gatk4_fastqtosam: - software/gatk4/fastqtosam/** - tests/software/gatk4/fastqtosam/** @@ -217,6 +214,10 @@ gatk4_haplotypecaller: - software/gatk4/haplotypecaller/** - tests/software/gatk4/haplotypecaller/** +gatk4_markduplicates: + - software/gatk4/markduplicates/** + - tests/software/gatk4/markduplicates/** + gatk4_mergebamalignment: - software/gatk4/mergebamalignment/** - tests/software/gatk4/mergebamalignment/** @@ -308,14 +309,14 @@ mosdepth: - software/mosdepth/** - tests/software/mosdepth/** -msisensor_scan: - - software/msisensor/scan/** - - tests/software/msisensor/scan/** - msisensor_msi: - software/msisensor/msi/** - tests/software/msisensor/msi/** +msisensor_scan: + - software/msisensor/scan/** + - tests/software/msisensor/scan/** + multiqc: - software/fastqc/** - software/multiqc/** @@ -429,6 +430,10 @@ sequenza_wiggle: - software/sequenza/wiggle/** - tests/software/sequenza/wiggle/** +sequenzautils_bam2seqz: + - software/sequenzautils/bam2seqz/** + - tests/software/sequenzautils/bam2seqz/** + sequenzautils_gcwiggle: - software/sequenzautils/gcwiggle/** - tests/software/sequenzautils/gcwiggle/** diff --git a/tests/config/test_data.config b/tests/config/test_data.config index 69290778..033a5978 100644 --- a/tests/config/test_data.config +++ b/tests/config/test_data.config @@ -22,6 +22,8 @@ params { kraken2 = "${test_data_dir}/genomics/sarscov2/genome/db/kraken2" kraken2_tar_gz = "${test_data_dir}/genomics/sarscov2/genome/db/kraken2.tar.gz" + + test_wig_gz = "${test_data_dir}/genomics/sarscov2/genome/gcwiggle/test.wig.gz" } 'illumina' { test_single_end_bam = "${test_data_dir}/genomics/sarscov2/illumina/bam/test_single_end.bam" @@ -67,4 +69,4 @@ params { } } } -} \ No newline at end of file +} diff --git a/tests/data/genomics/sarscov2/genome/gcwiggle/test.wig.gz b/tests/data/genomics/sarscov2/genome/gcwiggle/test.wig.gz new file mode 100644 index 00000000..e0f08ca4 Binary files /dev/null and b/tests/data/genomics/sarscov2/genome/gcwiggle/test.wig.gz differ diff --git a/tests/software/sequenzautils/bam2seqz/main.nf b/tests/software/sequenzautils/bam2seqz/main.nf new file mode 100755 index 00000000..898093a6 --- /dev/null +++ b/tests/software/sequenzautils/bam2seqz/main.nf @@ -0,0 +1,20 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { SEQUENZAUTILS_BAM2SEQZ } from '../../../../software/sequenzautils/bam2seqz/main.nf' addParams( options: [:] ) + +workflow test_sequenzautils_bam2seqz { + + tumourbam = file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true) + normalbam = file(params.test_data['sarscov2']['illumina']['test_single_end_sorted_bam'], checkIfExists: true) + + input = [ [ id:'test' ], // meta map + tumourbam, + normalbam + ] + fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + wigfile = file(params.test_data['sarscov2']['genome']['test_wig_gz'], checkIfExists: true) + + SEQUENZAUTILS_BAM2SEQZ ( input, fasta, wigfile ) +} diff --git a/tests/software/sequenzautils/bam2seqz/test.yml b/tests/software/sequenzautils/bam2seqz/test.yml new file mode 100644 index 00000000..c3f0c058 --- /dev/null +++ b/tests/software/sequenzautils/bam2seqz/test.yml @@ -0,0 +1,7 @@ +- name: sequenzautils bam2seqz + command: nextflow run ./tests/software/sequenzautils/bam2seqz -entry test_sequenzautils_bam2seqz -c tests/config/nextflow.config + tags: + - sequenzautils + - sequenzautils_bam2seqz + files: + - path: output/sequenzautils/test.seqz.gz