diff --git a/modules/hifiasm/functions.nf b/modules/hifiasm/functions.nf new file mode 100644 index 00000000..da9da093 --- /dev/null +++ b/modules/hifiasm/functions.nf @@ -0,0 +1,68 @@ +// +// Utility functions used in nf-core DSL2 module files +// + +// +// Extract name of software tool from process name using $task.process +// +def getSoftwareName(task_process) { + return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase() +} + +// +// Function to initialise default values and to generate a Groovy Map of available options for nf-core modules +// +def initOptions(Map args) { + def Map options = [:] + options.args = args.args ?: '' + options.args2 = args.args2 ?: '' + options.args3 = args.args3 ?: '' + options.publish_by_meta = args.publish_by_meta ?: [] + options.publish_dir = args.publish_dir ?: '' + options.publish_files = args.publish_files + options.suffix = args.suffix ?: '' + return options +} + +// +// Tidy up and join elements of a list to return a path string +// +def getPathFromList(path_list) { + def paths = path_list.findAll { item -> !item?.trim().isEmpty() } // Remove empty entries + paths = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes + return paths.join('/') +} + +// +// Function to save/publish module results +// +def saveFiles(Map args) { + if (!args.filename.endsWith('.version.txt')) { + def ioptions = initOptions(args.options) + def path_list = [ ioptions.publish_dir ?: args.publish_dir ] + if (ioptions.publish_by_meta) { + def key_list = ioptions.publish_by_meta instanceof List ? ioptions.publish_by_meta : args.publish_by_meta + for (key in key_list) { + if (args.meta && key instanceof String) { + def path = key + if (args.meta.containsKey(key)) { + path = args.meta[key] instanceof Boolean ? "${key}_${args.meta[key]}".toString() : args.meta[key] + } + path = path instanceof String ? path : '' + path_list.add(path) + } + } + } + if (ioptions.publish_files instanceof Map) { + for (ext in ioptions.publish_files) { + if (args.filename.endsWith(ext.key)) { + def ext_list = path_list.collect() + ext_list.add(ext.value) + return "${getPathFromList(ext_list)}/$args.filename" + } + } + } else if (ioptions.publish_files == null) { + return "${getPathFromList(path_list)}/$args.filename" + } + } +} diff --git a/modules/hifiasm/main.nf b/modules/hifiasm/main.nf new file mode 100644 index 00000000..5d005ee4 --- /dev/null +++ b/modules/hifiasm/main.nf @@ -0,0 +1,65 @@ +// Import generic module functions +include { initOptions; saveFiles; getSoftwareName } from './functions' + +params.options = [:] +options = initOptions(params.options) + +process HIFIASM { + tag "$meta.id" + label 'process_high' + publishDir "${params.outdir}", + mode: params.publish_dir_mode, + saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:meta, publish_by_meta:['id']) } + + conda (params.enable_conda ? "bioconda::hifiasm=0.15.4" : null) + if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { + container "https://depot.galaxyproject.org/singularity/hifiasm:0.15.4--h2e03b76_0" + } else { + container "quay.io/biocontainers/hifiasm:0.15.4--h2e03b76_0" + } + + input: + tuple val(meta), path(reads) + path paternal_kmer_dump + path maternal_kmer_dump + val use_parental_kmers + + output: + tuple val(meta), path("*.r_utg.gfa") , emit: raw_unitigs + tuple val(meta), path("*.ec.bin") , emit: corrected_reads + tuple val(meta), path("*.ovlp.source.bin") , emit: source_overlaps + tuple val(meta), path("*.ovlp.reverse.bin"), emit: reverse_overlaps + tuple val(meta), path("*.p_utg.gfa") , emit: processed_unitigs, optional: true + tuple val(meta), path("*.asm.p_ctg.gfa") , emit: primary_contigs , optional: true + tuple val(meta), path("*.asm.a_ctg.gfa") , emit: alternate_contigs, optional: true + tuple val(meta), path("*.hap1.p_ctg.gfa") , emit: paternal_contigs , optional: true + tuple val(meta), path("*.hap2.p_ctg.gfa") , emit: maternal_contigs , optional: true + path "*.version.txt" , emit: version + + script: + def software = getSoftwareName(task.process) + def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}" + if (use_parental_kmers) { + """ + hifiasm \\ + $options.args \\ + -o ${prefix}.asm \\ + -t $task.cpus \\ + -1 $paternal_kmer_dump \\ + -2 $maternal_kmer_dump \\ + $reads + + echo \$(hifiasm --version 2>&1) > ${software}.version.txt + """ + } else { // Phasing with Hi-C data is not supported yet + """ + hifiasm \\ + $options.args \\ + -o ${prefix}.asm \\ + -t $task.cpus \\ + $reads + + echo \$(hifiasm --version 2>&1) > ${software}.version.txt + """ + } +} diff --git a/modules/hifiasm/meta.yml b/modules/hifiasm/meta.yml new file mode 100644 index 00000000..dc414b93 --- /dev/null +++ b/modules/hifiasm/meta.yml @@ -0,0 +1,87 @@ +name: hifiasm +description: Whole-genome assembly using PacBio HiFi reads +keywords: + - genome assembly + - haplotype resolution + - phasing + - PacBio + - HiFi + - long reads +tools: + - hifiasm: + description: Haplotype-resolved assembler for accurate HiFi reads + homepage: https://github.com/chhylp123/hifiasm + documentation: https://github.com/chhylp123/hifiasm + tool_dev_url: https://github.com/chhylp123/hifiasm + doi: "10.1038/s41592-020-01056-5" + licence: ["MIT"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: FASTQ file with PacBio HiFi reads + pattern: "*.{fastq}" + - paternal_kmer_dump: + type: file + description: Yak kmer dump file for paternal reads (can be used for haplotype resolution). It can have an arbitrary extension. + - maternal_kmer_dump: + type: file + description: Yak kmer dump file for maternal reads (can be used for haplotype resolution). It can have an arbitrary extension. + - use_parental_kmers: + type: logical + description: A flag (true or false) signalling if the module should use the paternal and maternal kmer dumps. + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - version: + type: file + description: File containing software version + pattern: "*.{version.txt}" + - raw_unitigs: + type: file + description: Raw unitigs + pattern: "*.r_utg.gfa" + - processed_unitigs: + type: file + description: Processed unitigs + pattern: "*.p_utg.gfa" + - primary_contigs: + type: file + description: Primary contigs + pattern: "*.asm.p_ctg.gfa" + - alternate_contigs: + type: file + description: Alternative contigs + pattern: "*.asm.a_ctg.gfa" + - paternal_contigs: + type: file + description: Paternal contigs + pattern: "*.hap1.p_ctg.gfa" + - maternal_contigs: + type: file + description: Maternal contigs + pattern: "*.hap2.p_ctg.gfa" + - corrected_reads: + type: file + description: Corrected reads + pattern: "*.ec.bin" + - source_overlaps: + type: file + description: Source overlaps + pattern: "*.ovlp.source.bin" + - reverse_overlaps: + type: file + description: Reverse overlaps + pattern: "*.ovlp.reverse.bin" + +authors: + - "@sidorov-si" diff --git a/tests/config/pytest_modules.yml b/tests/config/pytest_modules.yml index 92748eca..d56a8695 100644 --- a/tests/config/pytest_modules.yml +++ b/tests/config/pytest_modules.yml @@ -346,6 +346,10 @@ gunzip: - modules/gunzip/** - tests/modules/gunzip/** +hifiasm: + - modules/hifiasm/** + - tests/modules/hifiasm/** + hisat2/align: - modules/hisat2/align/** - modules/hisat2/build/** diff --git a/tests/config/test_data.config b/tests/config/test_data.config index 314a2329..7b6e3cfd 100644 --- a/tests/config/test_data.config +++ b/tests/config/test_data.config @@ -158,6 +158,12 @@ params { test_10x_1_fastq_gz = "${test_data_dir}/genomics/homo_sapiens/illumina/10xgenomics/test.10x_1.fastq.gz" test_10x_2_fastq_gz = "${test_data_dir}/genomics/homo_sapiens/illumina/10xgenomics/test.10x_2.fastq.gz" + + test_yak = "${test_data_dir}/genomics/homo_sapiens/illumina/yak/test.yak" + test2_yak = "${test_data_dir}/genomics/homo_sapiens/illumina/yak/test2.yak" + } + 'pacbio' { + test_hifi_fastq_gz = "${test_data_dir}/genomics/homo_sapiens/pacbio/fastq/test_hifi.fastq.gz" } } } diff --git a/tests/modules/hifiasm/main.nf b/tests/modules/hifiasm/main.nf new file mode 100644 index 00000000..aeb64fb2 --- /dev/null +++ b/tests/modules/hifiasm/main.nf @@ -0,0 +1,31 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { HIFIASM } from '../../../modules/hifiasm/main.nf' addParams( options: [args:'-f0'] ) + +/* + * Test with long reads only + */ +workflow test_hifiasm_hifi_only { + input = [ + [ id:'test' ], // meta map + [ file(params.test_data['homo_sapiens']['pacbio']['test_hifi_fastq_gz'], checkIfExists: true) ] + ] + + HIFIASM ( input, [], [], false ) +} + +/* + * Test with parental reads for phasing + */ +workflow test_hifiasm_with_parental_reads { + input = [ + [ id:'test' ], // meta map + [ file(params.test_data['homo_sapiens']['pacbio']['test_hifi_fastq_gz'], checkIfExists: true) ] + ] + paternal_kmer_dump = file(params.test_data['homo_sapiens']['illumina']['test_yak'], checkIfExists: true) + maternal_kmer_dump = file(params.test_data['homo_sapiens']['illumina']['test2_yak'], checkIfExists: true) + + HIFIASM ( input, paternal_kmer_dump, maternal_kmer_dump, true ) +} diff --git a/tests/modules/hifiasm/test.yml b/tests/modules/hifiasm/test.yml new file mode 100644 index 00000000..47d9e38f --- /dev/null +++ b/tests/modules/hifiasm/test.yml @@ -0,0 +1,33 @@ +- name: hifiasm test_hifiasm_hifi_only + command: nextflow run tests/modules/hifiasm -entry test_hifiasm_hifi_only -c tests/config/nextflow.config + tags: + - hifiasm + files: + - path: output/hifiasm/test.asm.bp.hap1.p_ctg.gfa + md5sum: 73b0c1b01d445db91c269034b8660501 + - path: output/hifiasm/test.asm.bp.hap2.p_ctg.gfa + md5sum: 76847b7835185ab92611a820467c0066 + - path: output/hifiasm/test.asm.bp.p_utg.gfa + md5sum: 97446fb95de214835c36b10a90838486 + - path: output/hifiasm/test.asm.bp.r_utg.gfa + md5sum: 97446fb95de214835c36b10a90838486 + - path: output/hifiasm/test.asm.ec.bin + - path: output/hifiasm/test.asm.ovlp.reverse.bin + - path: output/hifiasm/test.asm.ovlp.source.bin + +- name: hifiasm test_hifiasm_with_parental_reads + command: nextflow run tests/modules/hifiasm -entry test_hifiasm_with_parental_reads -c tests/config/nextflow.config + tags: + - hifiasm + files: + - path: output/hifiasm/test.asm.dip.hap1.p_ctg.gfa + md5sum: 16907b1aea2081884deb9d039dd14038 + - path: output/hifiasm/test.asm.dip.hap2.p_ctg.gfa + md5sum: d283479bf72a31d0cc34f880535d0bd4 + - path: output/hifiasm/test.asm.dip.p_utg.gfa + md5sum: 97446fb95de214835c36b10a90838486 + - path: output/hifiasm/test.asm.dip.r_utg.gfa + md5sum: 97446fb95de214835c36b10a90838486 + - path: output/hifiasm/test.asm.ec.bin + - path: output/hifiasm/test.asm.ovlp.reverse.bin + - path: output/hifiasm/test.asm.ovlp.source.bin