Merge branch 'master' into bowtie2-test-fix

2024-12-22 02:58:17 +00:00 · 2022-05-13 12:05:48 +01:00 · 2022-05-13 12:05:48 +01:00 · 9f6a1ae4a4
commit 9f6a1ae4a4
parent d816f2bc5d 4acbf9d356
16 changed files with 277 additions and 91 deletions
--- a/modules/genomescope2/main.nf
+++ b/modules/genomescope2/main.nf
@ -0,0 +1,40 @@
+process GENOMESCOPE2 {
+    tag "$meta.id"
+    label 'process_low'
+
+    conda (params.enable_conda ? "bioconda::genomescope2=2.0" : null)
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/genomescope2:2.0--py310r41hdfd78af_5':
+        'quay.io/biocontainers/genomescope2:2.0--py310r41hdfd78af_5' }"
+
+    input:
+    tuple val(meta), path(histogram)
+
+    output:
+    tuple val(meta), path("*_linear_plot.png")            , emit: linear_plot_png
+    tuple val(meta), path("*_transformed_linear_plot.png"), emit: transformed_linear_plot_png
+    tuple val(meta), path("*_log_plot.png")               , emit: log_plot_png
+    tuple val(meta), path("*_transformed_log_plot.png")   , emit: transformed_log_plot_png
+    tuple val(meta), path("*_model.txt")                  , emit: model
+    tuple val(meta), path("*_summary.txt")                , emit: summary
+    path "versions.yml"                                   , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+    prefix = task.ext.prefix ?: "${meta.id}"
+    """
+    genomescope2 \\
+        --input $histogram \\
+        $args \\
+        --output . \\
+        --name_prefix $prefix
+
+    cat <<-END_VERSIONS > versions.yml
+    '${task.process}':
+        genomescope2: \$( genomescope2 -v | sed 's/GenomeScope //' )
+    END_VERSIONS
+    """
+}
--- a/modules/genomescope2/meta.yml
+++ b/modules/genomescope2/meta.yml
@ -0,0 +1,67 @@
+name: "genomescope2"
+description: Estimate genome heterozygosity, repeat content, and size from sequencing reads using a kmer-based statistical approach
+keywords:
+  - "genome size"
+  - "genome heterozygosity"
+  - "repeat content"
+tools:
+  - "genomescope2":
+      description: "Reference-free profiling of polyploid genomes"
+      homepage: "http://qb.cshl.edu/genomescope/genomescope2.0/"
+      documentation: "https://github.com/tbenavi1/genomescope2.0/blob/master/README.md"
+      tool_dev_url: "https://github.com/tbenavi1/genomescope2.0"
+      doi: "https://doi.org/10.1038/s41467-020-14998-3"
+      licence: "['Apache License, Version 2.0 (Apache-2.0)']"
+
+input:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - histogram:
+      type: file
+      description: A K-mer histogram file
+      pattern: "*.hist"
+
+output:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - versions:
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
+  - linear_plot_png:
+      type: file
+      description: A genomescope2 linear plot in PNG format
+      pattern: "*_linear_plot.png"
+  - linear_plot_png:
+      type: file
+      description: A genomescope2 linear plot in PNG format
+      pattern: "*_linear_plot.png"
+  - transformed_linear_plot_png:
+      type: file
+      description: A genomescope2 transformed linear plot in PNG format
+      pattern: "*_transformed_linear_plot.png"
+  - log_plot_png:
+      type: file
+      description: A genomescope2 log plot in PNG format
+      pattern: "*_log_plot.png"
+  - transformed_log_plot_png:
+      type: file
+      description: A genomescope2 transformed log plot in PNG format
+      pattern: "*_transformed_log_plot.png"
+  - model:
+      type: file
+      description: Genomescope2 model fit summary
+      pattern: "*_model.txt"
+  - summary:
+      type: file
+      description: Genomescope2 histogram summary
+      pattern: "*_summary.txt"
+
+authors:
+  - "@mahesh-panchal"
--- a/modules/rtgtools/vcfeval/main.nf
+++ b/modules/rtgtools/vcfeval/main.nf
@ -35,12 +35,13 @@ process RTGTOOLS_VCFEVAL {
    def eval_regions = evaluation_regions ? "--evaluation-regions=$evaluation_regions" : ""
    def truth_index = truth_vcf_tbi ? "" : "rtg index $truth_vcf"
    def query_index = query_vcf_tbi ? "" : "rtg index $query_vcf"
+    def avail_mem = task.memory.toGiga() + "G"

    """
    $truth_index
    $query_index

-    rtg vcfeval \\
+    rtg RTG_MEM=$avail_mem vcfeval \\
        $args \\
        --baseline=$truth_vcf \\
        $bed_regions \\
--- a/subworkflows/nf-core/sra_fastq/main.nf
+++ b/subworkflows/nf-core/sra_fastq/main.nf
@ -1,34 +0,0 @@
-//
-// Download FASTQ sequencing reads from the NCBI's Sequence Read Archive (SRA).
-//
-
-params.prefetch_options    = [:]
-params.fasterqdump_options = [:]
-
-include { SRATOOLS_PREFETCH    } from '../../../modules/sratools/prefetch/main'    addParams( options: params.prefetch_options    )
-include { SRATOOLS_FASTERQDUMP } from '../../../modules/sratools/fasterqdump/main' addParams( options: params.fasterqdump_options )
-
-workflow SRA_FASTQ {
-    take:
-    sra_ids  // channel: [ val(meta), val(id) ]
-
-    main:
-
-    ch_versions = Channel.empty()
-
-    //
-    // Prefetch sequencing reads in SRA format.
-    //
-    SRATOOLS_PREFETCH ( sra_ids )
-    ch_versions = ch_versions.mix( SRATOOLS_PREFETCH.out.versions.first() )
-
-    //
-    // Convert the SRA format into one or more compressed FASTQ files.
-    //
-    SRATOOLS_FASTERQDUMP ( SRATOOLS_PREFETCH.out.sra )
-    ch_versions = ch_versions.mix( SRATOOLS_FASTERQDUMP.out.versions.first() )
-
-    emit:
-    reads    = SRATOOLS_FASTERQDUMP.out.reads  // channel: [ val(meta), [ reads ] ]
-    versions = ch_versions                     // channel: [ versions.yml ]
-}
--- a/subworkflows/nf-core/srafastq/main.nf
+++ b/subworkflows/nf-core/srafastq/main.nf
@ -0,0 +1,38 @@
+include { CUSTOM_SRATOOLSNCBISETTINGS } from '../../../modules/custom/sratoolsncbisettings/main'
+include { SRATOOLS_PREFETCH           } from '../../../modules/sratools/prefetch/main'
+include { SRATOOLS_FASTERQDUMP        } from '../../../modules/sratools/fasterqdump/main'
+
+/**
+ * Download FASTQ sequencing reads from the NCBI's Sequence Read Archive (SRA).
+ */
+workflow SRAFASTQ {
+    take:
+    sra_ids  // channel: [ val(meta), val(id) ]
+
+    main:
+
+    ch_versions = Channel.empty()
+
+    //
+    // Detect existing NCBI user settings or create new ones.
+    //
+    CUSTOM_SRATOOLSNCBISETTINGS()
+    def settings = CUSTOM_SRATOOLSNCBISETTINGS.out.ncbi_settings
+    ch_versions = ch_versions.mix( CUSTOM_SRATOOLSNCBISETTINGS.out.versions )
+
+    //
+    // Prefetch sequencing reads in SRA format.
+    //
+    SRATOOLS_PREFETCH ( sra_ids, settings )
+    ch_versions = ch_versions.mix( SRATOOLS_PREFETCH.out.versions.first() )
+
+    //
+    // Convert the SRA format into one or more compressed FASTQ files.
+    //
+    SRATOOLS_FASTERQDUMP ( SRATOOLS_PREFETCH.out.sra, settings )
+    ch_versions = ch_versions.mix( SRATOOLS_FASTERQDUMP.out.versions.first() )
+
+    emit:
+    reads    = SRATOOLS_FASTERQDUMP.out.reads  // channel: [ val(meta), [ reads ] ]
+    versions = ch_versions                     // channel: [ versions.yml ]
+}
--- a/subworkflows/nf-core/sra_fastq/meta.yml
+++ b/subworkflows/nf-core/sra_fastq/meta.yml
@ -1,11 +1,14 @@
 name: sra_fastq
 description: Download FASTQ sequencing reads from the NCBI's Sequence Read Archive (SRA).
 keywords:
+  - SRA
+  - NCBI
  - sequencing
  - FASTQ
  - prefetch
-  - dump
+  - fasterq-dump
 modules:
+  - custom/sratoolsncbisettings
  - sratools/prefetch
  - sratools/fasterqdump
 input:
@ -17,7 +20,7 @@ input:
  - id:
      type: string
      description: >
-        SRA identifier.
+        SRA run identifier.
 # TODO Update when we decide on a standard for subworkflow docs
 output:
  - meta:
--- a/subworkflows/nf-core/sra_fastq/nextflow.config
+++ b/subworkflows/nf-core/sra_fastq/nextflow.config
--- a/tests/config/pytest_modules.yml
+++ b/tests/config/pytest_modules.yml
@ -839,6 +839,10 @@ genmap/mappability:
  - modules/genmap/mappability/**
  - tests/modules/genmap/mappability/**

+genomescope2:
+  - modules/genomescope2/**
+  - tests/modules/genomescope2/**
+
 genrich:
  - modules/genrich/**
  - tests/modules/genrich/**
@ -1647,14 +1651,14 @@ samtools/bam2fq:
  - modules/samtools/bam2fq/**
  - tests/modules/samtools/bam2fq/**

-samtools/convert:
-  - modules/samtools/convert/**
-  - tests/modules/samtools/convert/**
-
 samtools/collatefastq:
  - modules/samtools/collatefastq/**
  - tests/modules/samtools/collatefastq/**

+samtools/convert:
+  - modules/samtools/convert/**
+  - tests/modules/samtools/convert/**
+
 samtools/depth:
  - modules/samtools/depth/**
  - tests/modules/samtools/depth/**
--- a/tests/modules/genomescope2/main.nf
+++ b/tests/modules/genomescope2/main.nf
@ -0,0 +1,19 @@
+#!/usr/bin/env nextflow
+
+nextflow.enable.dsl = 2
+
+include { MERYL_COUNT     } from '../../../modules/meryl/count/main.nf'
+include { MERYL_HISTOGRAM } from '../../../modules/meryl/histogram/main.nf'
+include { GENOMESCOPE2    } from '../../../modules/genomescope2/main.nf'
+
+workflow test_genomescope2 {
+
+    input = [
+        [ id:'test', single_end:false ], // meta map
+        file(params.test_data['bacteroides_fragilis']['illumina']['test1_1_fastq_gz'], checkIfExists: true)
+    ]
+
+    MERYL_COUNT ( input )
+    MERYL_HISTOGRAM ( MERYL_COUNT.out.meryl_db )
+    GENOMESCOPE2 ( MERYL_HISTOGRAM.out.hist )
+}
--- a/tests/modules/genomescope2/nextflow.config
+++ b/tests/modules/genomescope2/nextflow.config
@ -0,0 +1,13 @@
+process {
+
+    publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
+
+    withName: 'MERYL.*' {
+        ext.args = 'k=21'
+    }
+
+    withName: 'GENOMESCOPE2' {
+        ext.args = '-k 21 -p 1'
+    }
+
+}
--- a/tests/modules/genomescope2/test.yml
+++ b/tests/modules/genomescope2/test.yml
@ -0,0 +1,22 @@
+- name: genomescope2 test_genomescope2
+  command: nextflow run tests/modules/genomescope2 -entry test_genomescope2 -c tests/config/nextflow.config
+  tags:
+    - genomescope2
+  files:
+    - path: output/genomescope2/test_linear_plot.png
+      md5sum: 94c165c5028156299a1d4d05766cac51
+    - path: output/genomescope2/test_log_plot.png
+      md5sum: 9d25ca463d92a0c73a893da7fd3979ba
+    - path: output/genomescope2/test_model.txt
+      md5sum: 3caf62f715f64a2f2b8fdff5d079cb84
+    - path: output/genomescope2/test_summary.txt
+      md5sum: 7452860e2cea99b85f3ff60daeac77f5
+    - path: output/genomescope2/test_transformed_linear_plot.png
+      md5sum: 99a64c1c18d8670f64cb863d4334abbb
+    - path: output/genomescope2/test_transformed_log_plot.png
+      md5sum: b4e029c9fb9987ca33b17392a691c1b4
+    - path: output/genomescope2/versions.yml
+      md5sum: 18afeb26f62a47f680b2bb3e27da9cbc
+    - path: output/meryl/test.hist
+      md5sum: f75362ab9cd70d96621b3690e952085f
+    - path: output/meryl/versions.yml
--- a/tests/subworkflows/nf-core/sra_fastq/main.nf
+++ b/tests/subworkflows/nf-core/sra_fastq/main.nf
@ -1,23 +0,0 @@
-#!/usr/bin/env nextflow
-
-nextflow.enable.dsl = 2
-
-include { SRA_FASTQ } from '../../../../subworkflows/nf-core/sra_fastq/main.nf' addParams( [:] )
-
-workflow test_sra_fastq_single_end {
-    input = [
-        [ id:'test_single_end', single_end:true ], // meta map
-        'SRR13255544'
-    ]
-
-    SRA_FASTQ ( input )
-}
-
-workflow test_sra_fastq_paired_end {
-    input = [
-        [ id:'test_paired_end', single_end:false ], // meta map
-        'SRR11140744'
-    ]
-
-    SRA_FASTQ ( input )
-}
--- a/tests/subworkflows/nf-core/sra_fastq/test.yml
+++ b/tests/subworkflows/nf-core/sra_fastq/test.yml
@ -1,27 +0,0 @@
- name: sra fastq single-end
-  command: nextflow run ./tests/subworkflows/nf-core/sra_fastq -entry test_sra_fastq_single_end -c tests/config/nextflow.config
-  tags:
-    - subworkflows
-    # - subworkflows/sra_fastq
-    # Modules
-    # - sratools
-    # - sratools/prefetch
-    # - sratools/fasterqdump
-  files:
-    - path: output/sratools/SRR13255544.fastq.gz
-      md5sum: 1054c7b71884acdb5eed8a378f18be82
-
- name: sra fastq paired-end
-  command: nextflow run ./tests/subworkflows/nf-core/sra_fastq -entry test_sra_fastq_paired_end -c tests/config/nextflow.config
-  tags:
-    - subworkflows
-    # - subworkflows/sra_fastq
-    # Modules
-    # - sratools
-    # - sratools/prefetch
-    # - sratools/fasterqdump
-  files:
-    - path: output/sratools/SRR11140744_1.fastq.gz
-      md5sum: 193809c784a4ea132ab2a253fa4f55b6
-    - path: output/sratools/SRR11140744_2.fastq.gz
-      md5sum: 3e3b3af3413f50a1685fd7b3f1456d4e
--- a/tests/subworkflows/nf-core/srafastq/main.nf
+++ b/tests/subworkflows/nf-core/srafastq/main.nf
@ -0,0 +1,29 @@
+#!/usr/bin/env nextflow
+
+nextflow.enable.dsl = 2
+
+include { SRAFASTQ } from '../../../../subworkflows/nf-core/srafastq/main.nf'
+
+workflow test_srafastq_single_end {
+    input = Channel.of(
+        [
+            [ id:'test_single_end1', single_end:true ], // meta map
+            'DRR000774'
+        ],
+        [
+            [ id:'test_single_end2', single_end:true ], // meta map
+            'DRR000775'
+        ]
+    )
+
+    SRAFASTQ ( input )
+}
+
+workflow test_srafastq_paired_end {
+    input = [
+        [ id:'test_paired_end', single_end:false ], // meta map
+        'SRR11140744'
+    ]
+
+    SRAFASTQ ( input )
+}
--- a/tests/subworkflows/nf-core/srafastq/nextflow.config
+++ b/tests/subworkflows/nf-core/srafastq/nextflow.config
@ -0,0 +1,5 @@
+process {
+
+    publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
+
+}
--- a/tests/subworkflows/nf-core/srafastq/test.yml
+++ b/tests/subworkflows/nf-core/srafastq/test.yml
@ -0,0 +1,29 @@
+- name: srafastq single-end
+  command: nextflow run ./tests/subworkflows/nf-core/srafastq -entry test_srafastq_single_end -c tests/config/nextflow.config -c tests/subworkflows/nf-core/srafastq/nextflow.config
+  tags:
+    - subworkflows
+    # - subworkflows/srafastq
+    # Modules
+    # - sratools
+    # - sratools/prefetch
+    # - sratools/fasterqdump
+  files:
+    - path: output/sratools/DRR000774.fastq.gz
+      md5sum: 19029a1132115b55277a0d79ee089b49
+    - path: output/sratools/DRR000775.fastq.gz
+      md5sum: 59ff24c86ecb260752668c059c2a1eaf
+
+- name: srafastq paired-end
+  command: nextflow run ./tests/subworkflows/nf-core/srafastq -entry test_srafastq_paired_end -c tests/config/nextflow.config -c tests/subworkflows/nf-core/srafastq/nextflow.config
+  tags:
+    - subworkflows
+    # - subworkflows/srafastq
+    # Modules
+    # - sratools
+    # - sratools/prefetch
+    # - sratools/fasterqdump
+  files:
+    - path: output/sratools/SRR11140744_1.fastq.gz
+      md5sum: 193809c784a4ea132ab2a253fa4f55b6
+    - path: output/sratools/SRR11140744_2.fastq.gz
+      md5sum: 3e3b3af3413f50a1685fd7b3f1456d4e