diff --git a/modules/genomescope2/main.nf b/modules/genomescope2/main.nf new file mode 100644 index 00000000..2ddf9e43 --- /dev/null +++ b/modules/genomescope2/main.nf @@ -0,0 +1,40 @@ +process GENOMESCOPE2 { + tag "$meta.id" + label 'process_low' + + conda (params.enable_conda ? "bioconda::genomescope2=2.0" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/genomescope2:2.0--py310r41hdfd78af_5': + 'quay.io/biocontainers/genomescope2:2.0--py310r41hdfd78af_5' }" + + input: + tuple val(meta), path(histogram) + + output: + tuple val(meta), path("*_linear_plot.png") , emit: linear_plot_png + tuple val(meta), path("*_transformed_linear_plot.png"), emit: transformed_linear_plot_png + tuple val(meta), path("*_log_plot.png") , emit: log_plot_png + tuple val(meta), path("*_transformed_log_plot.png") , emit: transformed_log_plot_png + tuple val(meta), path("*_model.txt") , emit: model + tuple val(meta), path("*_summary.txt") , emit: summary + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + """ + genomescope2 \\ + --input $histogram \\ + $args \\ + --output . \\ + --name_prefix $prefix + + cat <<-END_VERSIONS > versions.yml + '${task.process}': + genomescope2: \$( genomescope2 -v | sed 's/GenomeScope //' ) + END_VERSIONS + """ +} diff --git a/modules/genomescope2/meta.yml b/modules/genomescope2/meta.yml new file mode 100644 index 00000000..505daafe --- /dev/null +++ b/modules/genomescope2/meta.yml @@ -0,0 +1,67 @@ +name: "genomescope2" +description: Estimate genome heterozygosity, repeat content, and size from sequencing reads using a kmer-based statistical approach +keywords: + - "genome size" + - "genome heterozygosity" + - "repeat content" +tools: + - "genomescope2": + description: "Reference-free profiling of polyploid genomes" + homepage: "http://qb.cshl.edu/genomescope/genomescope2.0/" + documentation: "https://github.com/tbenavi1/genomescope2.0/blob/master/README.md" + tool_dev_url: "https://github.com/tbenavi1/genomescope2.0" + doi: "https://doi.org/10.1038/s41467-020-14998-3" + licence: "['Apache License, Version 2.0 (Apache-2.0)']" + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - histogram: + type: file + description: A K-mer histogram file + pattern: "*.hist" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - linear_plot_png: + type: file + description: A genomescope2 linear plot in PNG format + pattern: "*_linear_plot.png" + - linear_plot_png: + type: file + description: A genomescope2 linear plot in PNG format + pattern: "*_linear_plot.png" + - transformed_linear_plot_png: + type: file + description: A genomescope2 transformed linear plot in PNG format + pattern: "*_transformed_linear_plot.png" + - log_plot_png: + type: file + description: A genomescope2 log plot in PNG format + pattern: "*_log_plot.png" + - transformed_log_plot_png: + type: file + description: A genomescope2 transformed log plot in PNG format + pattern: "*_transformed_log_plot.png" + - model: + type: file + description: Genomescope2 model fit summary + pattern: "*_model.txt" + - summary: + type: file + description: Genomescope2 histogram summary + pattern: "*_summary.txt" + +authors: + - "@mahesh-panchal" diff --git a/modules/rtgtools/vcfeval/main.nf b/modules/rtgtools/vcfeval/main.nf index 1bad4231..27a488f7 100644 --- a/modules/rtgtools/vcfeval/main.nf +++ b/modules/rtgtools/vcfeval/main.nf @@ -35,12 +35,13 @@ process RTGTOOLS_VCFEVAL { def eval_regions = evaluation_regions ? "--evaluation-regions=$evaluation_regions" : "" def truth_index = truth_vcf_tbi ? "" : "rtg index $truth_vcf" def query_index = query_vcf_tbi ? "" : "rtg index $query_vcf" + def avail_mem = task.memory.toGiga() + "G" """ $truth_index $query_index - rtg vcfeval \\ + rtg RTG_MEM=$avail_mem vcfeval \\ $args \\ --baseline=$truth_vcf \\ $bed_regions \\ diff --git a/subworkflows/nf-core/sra_fastq/main.nf b/subworkflows/nf-core/sra_fastq/main.nf deleted file mode 100644 index ffa380d9..00000000 --- a/subworkflows/nf-core/sra_fastq/main.nf +++ /dev/null @@ -1,34 +0,0 @@ -// -// Download FASTQ sequencing reads from the NCBI's Sequence Read Archive (SRA). -// - -params.prefetch_options = [:] -params.fasterqdump_options = [:] - -include { SRATOOLS_PREFETCH } from '../../../modules/sratools/prefetch/main' addParams( options: params.prefetch_options ) -include { SRATOOLS_FASTERQDUMP } from '../../../modules/sratools/fasterqdump/main' addParams( options: params.fasterqdump_options ) - -workflow SRA_FASTQ { - take: - sra_ids // channel: [ val(meta), val(id) ] - - main: - - ch_versions = Channel.empty() - - // - // Prefetch sequencing reads in SRA format. - // - SRATOOLS_PREFETCH ( sra_ids ) - ch_versions = ch_versions.mix( SRATOOLS_PREFETCH.out.versions.first() ) - - // - // Convert the SRA format into one or more compressed FASTQ files. - // - SRATOOLS_FASTERQDUMP ( SRATOOLS_PREFETCH.out.sra ) - ch_versions = ch_versions.mix( SRATOOLS_FASTERQDUMP.out.versions.first() ) - - emit: - reads = SRATOOLS_FASTERQDUMP.out.reads // channel: [ val(meta), [ reads ] ] - versions = ch_versions // channel: [ versions.yml ] -} diff --git a/subworkflows/nf-core/srafastq/main.nf b/subworkflows/nf-core/srafastq/main.nf new file mode 100644 index 00000000..26e8105e --- /dev/null +++ b/subworkflows/nf-core/srafastq/main.nf @@ -0,0 +1,38 @@ +include { CUSTOM_SRATOOLSNCBISETTINGS } from '../../../modules/custom/sratoolsncbisettings/main' +include { SRATOOLS_PREFETCH } from '../../../modules/sratools/prefetch/main' +include { SRATOOLS_FASTERQDUMP } from '../../../modules/sratools/fasterqdump/main' + +/** + * Download FASTQ sequencing reads from the NCBI's Sequence Read Archive (SRA). + */ +workflow SRAFASTQ { + take: + sra_ids // channel: [ val(meta), val(id) ] + + main: + + ch_versions = Channel.empty() + + // + // Detect existing NCBI user settings or create new ones. + // + CUSTOM_SRATOOLSNCBISETTINGS() + def settings = CUSTOM_SRATOOLSNCBISETTINGS.out.ncbi_settings + ch_versions = ch_versions.mix( CUSTOM_SRATOOLSNCBISETTINGS.out.versions ) + + // + // Prefetch sequencing reads in SRA format. + // + SRATOOLS_PREFETCH ( sra_ids, settings ) + ch_versions = ch_versions.mix( SRATOOLS_PREFETCH.out.versions.first() ) + + // + // Convert the SRA format into one or more compressed FASTQ files. + // + SRATOOLS_FASTERQDUMP ( SRATOOLS_PREFETCH.out.sra, settings ) + ch_versions = ch_versions.mix( SRATOOLS_FASTERQDUMP.out.versions.first() ) + + emit: + reads = SRATOOLS_FASTERQDUMP.out.reads // channel: [ val(meta), [ reads ] ] + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/nf-core/sra_fastq/meta.yml b/subworkflows/nf-core/srafastq/meta.yml similarity index 90% rename from subworkflows/nf-core/sra_fastq/meta.yml rename to subworkflows/nf-core/srafastq/meta.yml index 5114bce5..873ccaca 100644 --- a/subworkflows/nf-core/sra_fastq/meta.yml +++ b/subworkflows/nf-core/srafastq/meta.yml @@ -1,11 +1,14 @@ name: sra_fastq description: Download FASTQ sequencing reads from the NCBI's Sequence Read Archive (SRA). keywords: + - SRA + - NCBI - sequencing - FASTQ - prefetch - - dump + - fasterq-dump modules: + - custom/sratoolsncbisettings - sratools/prefetch - sratools/fasterqdump input: @@ -17,7 +20,7 @@ input: - id: type: string description: > - SRA identifier. + SRA run identifier. # TODO Update when we decide on a standard for subworkflow docs output: - meta: diff --git a/subworkflows/nf-core/sra_fastq/nextflow.config b/subworkflows/nf-core/srafastq/nextflow.config similarity index 100% rename from subworkflows/nf-core/sra_fastq/nextflow.config rename to subworkflows/nf-core/srafastq/nextflow.config diff --git a/tests/config/pytest_modules.yml b/tests/config/pytest_modules.yml index 428c3652..74cc4e1c 100644 --- a/tests/config/pytest_modules.yml +++ b/tests/config/pytest_modules.yml @@ -839,6 +839,10 @@ genmap/mappability: - modules/genmap/mappability/** - tests/modules/genmap/mappability/** +genomescope2: + - modules/genomescope2/** + - tests/modules/genomescope2/** + genrich: - modules/genrich/** - tests/modules/genrich/** @@ -1647,14 +1651,14 @@ samtools/bam2fq: - modules/samtools/bam2fq/** - tests/modules/samtools/bam2fq/** -samtools/convert: - - modules/samtools/convert/** - - tests/modules/samtools/convert/** - samtools/collatefastq: - modules/samtools/collatefastq/** - tests/modules/samtools/collatefastq/** +samtools/convert: + - modules/samtools/convert/** + - tests/modules/samtools/convert/** + samtools/depth: - modules/samtools/depth/** - tests/modules/samtools/depth/** diff --git a/tests/modules/genomescope2/main.nf b/tests/modules/genomescope2/main.nf new file mode 100644 index 00000000..5ceebfd6 --- /dev/null +++ b/tests/modules/genomescope2/main.nf @@ -0,0 +1,19 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { MERYL_COUNT } from '../../../modules/meryl/count/main.nf' +include { MERYL_HISTOGRAM } from '../../../modules/meryl/histogram/main.nf' +include { GENOMESCOPE2 } from '../../../modules/genomescope2/main.nf' + +workflow test_genomescope2 { + + input = [ + [ id:'test', single_end:false ], // meta map + file(params.test_data['bacteroides_fragilis']['illumina']['test1_1_fastq_gz'], checkIfExists: true) + ] + + MERYL_COUNT ( input ) + MERYL_HISTOGRAM ( MERYL_COUNT.out.meryl_db ) + GENOMESCOPE2 ( MERYL_HISTOGRAM.out.hist ) +} diff --git a/tests/modules/genomescope2/nextflow.config b/tests/modules/genomescope2/nextflow.config new file mode 100644 index 00000000..29a0be3a --- /dev/null +++ b/tests/modules/genomescope2/nextflow.config @@ -0,0 +1,13 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + + withName: 'MERYL.*' { + ext.args = 'k=21' + } + + withName: 'GENOMESCOPE2' { + ext.args = '-k 21 -p 1' + } + +} diff --git a/tests/modules/genomescope2/test.yml b/tests/modules/genomescope2/test.yml new file mode 100644 index 00000000..5b03f545 --- /dev/null +++ b/tests/modules/genomescope2/test.yml @@ -0,0 +1,22 @@ +- name: genomescope2 test_genomescope2 + command: nextflow run tests/modules/genomescope2 -entry test_genomescope2 -c tests/config/nextflow.config + tags: + - genomescope2 + files: + - path: output/genomescope2/test_linear_plot.png + md5sum: 94c165c5028156299a1d4d05766cac51 + - path: output/genomescope2/test_log_plot.png + md5sum: 9d25ca463d92a0c73a893da7fd3979ba + - path: output/genomescope2/test_model.txt + md5sum: 3caf62f715f64a2f2b8fdff5d079cb84 + - path: output/genomescope2/test_summary.txt + md5sum: 7452860e2cea99b85f3ff60daeac77f5 + - path: output/genomescope2/test_transformed_linear_plot.png + md5sum: 99a64c1c18d8670f64cb863d4334abbb + - path: output/genomescope2/test_transformed_log_plot.png + md5sum: b4e029c9fb9987ca33b17392a691c1b4 + - path: output/genomescope2/versions.yml + md5sum: 18afeb26f62a47f680b2bb3e27da9cbc + - path: output/meryl/test.hist + md5sum: f75362ab9cd70d96621b3690e952085f + - path: output/meryl/versions.yml diff --git a/tests/subworkflows/nf-core/sra_fastq/main.nf b/tests/subworkflows/nf-core/sra_fastq/main.nf deleted file mode 100644 index 988758f3..00000000 --- a/tests/subworkflows/nf-core/sra_fastq/main.nf +++ /dev/null @@ -1,23 +0,0 @@ -#!/usr/bin/env nextflow - -nextflow.enable.dsl = 2 - -include { SRA_FASTQ } from '../../../../subworkflows/nf-core/sra_fastq/main.nf' addParams( [:] ) - -workflow test_sra_fastq_single_end { - input = [ - [ id:'test_single_end', single_end:true ], // meta map - 'SRR13255544' - ] - - SRA_FASTQ ( input ) -} - -workflow test_sra_fastq_paired_end { - input = [ - [ id:'test_paired_end', single_end:false ], // meta map - 'SRR11140744' - ] - - SRA_FASTQ ( input ) -} diff --git a/tests/subworkflows/nf-core/sra_fastq/test.yml b/tests/subworkflows/nf-core/sra_fastq/test.yml deleted file mode 100644 index 4b75431f..00000000 --- a/tests/subworkflows/nf-core/sra_fastq/test.yml +++ /dev/null @@ -1,27 +0,0 @@ -- name: sra fastq single-end - command: nextflow run ./tests/subworkflows/nf-core/sra_fastq -entry test_sra_fastq_single_end -c tests/config/nextflow.config - tags: - - subworkflows - # - subworkflows/sra_fastq - # Modules - # - sratools - # - sratools/prefetch - # - sratools/fasterqdump - files: - - path: output/sratools/SRR13255544.fastq.gz - md5sum: 1054c7b71884acdb5eed8a378f18be82 - -- name: sra fastq paired-end - command: nextflow run ./tests/subworkflows/nf-core/sra_fastq -entry test_sra_fastq_paired_end -c tests/config/nextflow.config - tags: - - subworkflows - # - subworkflows/sra_fastq - # Modules - # - sratools - # - sratools/prefetch - # - sratools/fasterqdump - files: - - path: output/sratools/SRR11140744_1.fastq.gz - md5sum: 193809c784a4ea132ab2a253fa4f55b6 - - path: output/sratools/SRR11140744_2.fastq.gz - md5sum: 3e3b3af3413f50a1685fd7b3f1456d4e diff --git a/tests/subworkflows/nf-core/srafastq/main.nf b/tests/subworkflows/nf-core/srafastq/main.nf new file mode 100644 index 00000000..82c8f29d --- /dev/null +++ b/tests/subworkflows/nf-core/srafastq/main.nf @@ -0,0 +1,29 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { SRAFASTQ } from '../../../../subworkflows/nf-core/srafastq/main.nf' + +workflow test_srafastq_single_end { + input = Channel.of( + [ + [ id:'test_single_end1', single_end:true ], // meta map + 'DRR000774' + ], + [ + [ id:'test_single_end2', single_end:true ], // meta map + 'DRR000775' + ] + ) + + SRAFASTQ ( input ) +} + +workflow test_srafastq_paired_end { + input = [ + [ id:'test_paired_end', single_end:false ], // meta map + 'SRR11140744' + ] + + SRAFASTQ ( input ) +} diff --git a/tests/subworkflows/nf-core/srafastq/nextflow.config b/tests/subworkflows/nf-core/srafastq/nextflow.config new file mode 100644 index 00000000..8730f1c4 --- /dev/null +++ b/tests/subworkflows/nf-core/srafastq/nextflow.config @@ -0,0 +1,5 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + +} diff --git a/tests/subworkflows/nf-core/srafastq/test.yml b/tests/subworkflows/nf-core/srafastq/test.yml new file mode 100644 index 00000000..73424171 --- /dev/null +++ b/tests/subworkflows/nf-core/srafastq/test.yml @@ -0,0 +1,29 @@ +- name: srafastq single-end + command: nextflow run ./tests/subworkflows/nf-core/srafastq -entry test_srafastq_single_end -c tests/config/nextflow.config -c tests/subworkflows/nf-core/srafastq/nextflow.config + tags: + - subworkflows + # - subworkflows/srafastq + # Modules + # - sratools + # - sratools/prefetch + # - sratools/fasterqdump + files: + - path: output/sratools/DRR000774.fastq.gz + md5sum: 19029a1132115b55277a0d79ee089b49 + - path: output/sratools/DRR000775.fastq.gz + md5sum: 59ff24c86ecb260752668c059c2a1eaf + +- name: srafastq paired-end + command: nextflow run ./tests/subworkflows/nf-core/srafastq -entry test_srafastq_paired_end -c tests/config/nextflow.config -c tests/subworkflows/nf-core/srafastq/nextflow.config + tags: + - subworkflows + # - subworkflows/srafastq + # Modules + # - sratools + # - sratools/prefetch + # - sratools/fasterqdump + files: + - path: output/sratools/SRR11140744_1.fastq.gz + md5sum: 193809c784a4ea132ab2a253fa4f55b6 + - path: output/sratools/SRR11140744_2.fastq.gz + md5sum: 3e3b3af3413f50a1685fd7b3f1456d4e