Merge branch 'master' into bowtie2-test-fix

2024-12-22 11:08:17 +00:00 · 2022-05-13 12:05:48 +01:00 · 2022-05-13 12:05:48 +01:00 · 9f6a1ae4a4
commit 9f6a1ae4a4
parent d816f2bc5d 4acbf9d356
16 changed files with 277 additions and 91 deletions
--- a/modules/genomescope2/main.nf
+++ b/modules/genomescope2/main.nf
@ -0,0 +1,40 @@
 process GENOMESCOPE2 {
    tag "$meta.id"
    label 'process_low'
    conda (params.enable_conda ? "bioconda::genomescope2=2.0" : null)
    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
        'https://depot.galaxyproject.org/singularity/genomescope2:2.0--py310r41hdfd78af_5':
        'quay.io/biocontainers/genomescope2:2.0--py310r41hdfd78af_5' }"
    input:
    tuple val(meta), path(histogram)
    output:
    tuple val(meta), path("*_linear_plot.png")            , emit: linear_plot_png
    tuple val(meta), path("*_transformed_linear_plot.png"), emit: transformed_linear_plot_png
    tuple val(meta), path("*_log_plot.png")               , emit: log_plot_png
    tuple val(meta), path("*_transformed_log_plot.png")   , emit: transformed_log_plot_png
    tuple val(meta), path("*_model.txt")                  , emit: model
    tuple val(meta), path("*_summary.txt")                , emit: summary
    path "versions.yml"                                   , emit: versions
    when:
    task.ext.when == null || task.ext.when
    script:
    def args = task.ext.args ?: ''
    prefix = task.ext.prefix ?: "${meta.id}"
    """
    genomescope2 \\
        --input $histogram \\
        $args \\
        --output . \\
        --name_prefix $prefix
    cat <<-END_VERSIONS > versions.yml
    '${task.process}':
        genomescope2: \$( genomescope2 -v | sed 's/GenomeScope //' )
    END_VERSIONS
    """
 }
--- a/modules/genomescope2/meta.yml
+++ b/modules/genomescope2/meta.yml
@ -0,0 +1,67 @@
 name: "genomescope2"
 description: Estimate genome heterozygosity, repeat content, and size from sequencing reads using a kmer-based statistical approach
 keywords:
  - "genome size"
  - "genome heterozygosity"
  - "repeat content"
 tools:
  - "genomescope2":
      description: "Reference-free profiling of polyploid genomes"
      homepage: "http://qb.cshl.edu/genomescope/genomescope2.0/"
      documentation: "https://github.com/tbenavi1/genomescope2.0/blob/master/README.md"
      tool_dev_url: "https://github.com/tbenavi1/genomescope2.0"
      doi: "https://doi.org/10.1038/s41467-020-14998-3"
      licence: "['Apache License, Version 2.0 (Apache-2.0)']"
 input:
  - meta:
      type: map
      description: |
        Groovy Map containing sample information
        e.g. [ id:'test', single_end:false ]
  - histogram:
      type: file
      description: A K-mer histogram file
      pattern: "*.hist"
 output:
  - meta:
      type: map
      description: |
        Groovy Map containing sample information
        e.g. [ id:'test', single_end:false ]
  - versions:
      type: file
      description: File containing software versions
      pattern: "versions.yml"
  - linear_plot_png:
      type: file
      description: A genomescope2 linear plot in PNG format
      pattern: "*_linear_plot.png"
  - linear_plot_png:
      type: file
      description: A genomescope2 linear plot in PNG format
      pattern: "*_linear_plot.png"
  - transformed_linear_plot_png:
      type: file
      description: A genomescope2 transformed linear plot in PNG format
      pattern: "*_transformed_linear_plot.png"
  - log_plot_png:
      type: file
      description: A genomescope2 log plot in PNG format
      pattern: "*_log_plot.png"
  - transformed_log_plot_png:
      type: file
      description: A genomescope2 transformed log plot in PNG format
      pattern: "*_transformed_log_plot.png"
  - model:
      type: file
      description: Genomescope2 model fit summary
      pattern: "*_model.txt"
  - summary:
      type: file
      description: Genomescope2 histogram summary
      pattern: "*_summary.txt"
 authors:
  - "@mahesh-panchal"
--- a/modules/rtgtools/vcfeval/main.nf
+++ b/modules/rtgtools/vcfeval/main.nf
@ -35,12 +35,13 @@ process RTGTOOLS_VCFEVAL {
    def eval_regions = evaluation_regions ? "--evaluation-regions=$evaluation_regions" : ""
    def truth_index = truth_vcf_tbi ? "" : "rtg index $truth_vcf"
    def query_index = query_vcf_tbi ? "" : "rtg index $query_vcf"
    def avail_mem = task.memory.toGiga() + "G"
    """
    $truth_index
    $query_index
-    rtg vcfeval \\
+    rtg RTG_MEM=$avail_mem vcfeval \\
        $args \\
        --baseline=$truth_vcf \\
        $bed_regions \\
--- a/subworkflows/nf-core/sra_fastq/main.nf
+++ b/subworkflows/nf-core/sra_fastq/main.nf
@ -1,34 +0,0 @@
 //
 // Download FASTQ sequencing reads from the NCBI's Sequence Read Archive (SRA).
 //
 params.prefetch_options    = [:]
 params.fasterqdump_options = [:]
 include { SRATOOLS_PREFETCH    } from '../../../modules/sratools/prefetch/main'    addParams( options: params.prefetch_options    )
 include { SRATOOLS_FASTERQDUMP } from '../../../modules/sratools/fasterqdump/main' addParams( options: params.fasterqdump_options )
 workflow SRA_FASTQ {
    take:
    sra_ids  // channel: [ val(meta), val(id) ]
    main:
    ch_versions = Channel.empty()
    //
    // Prefetch sequencing reads in SRA format.
    //
    SRATOOLS_PREFETCH ( sra_ids )
    ch_versions = ch_versions.mix( SRATOOLS_PREFETCH.out.versions.first() )
    //
    // Convert the SRA format into one or more compressed FASTQ files.
    //
    SRATOOLS_FASTERQDUMP ( SRATOOLS_PREFETCH.out.sra )
    ch_versions = ch_versions.mix( SRATOOLS_FASTERQDUMP.out.versions.first() )
    emit:
    reads    = SRATOOLS_FASTERQDUMP.out.reads  // channel: [ val(meta), [ reads ] ]
    versions = ch_versions                     // channel: [ versions.yml ]
 }
--- a/subworkflows/nf-core/srafastq/main.nf
+++ b/subworkflows/nf-core/srafastq/main.nf
@ -0,0 +1,38 @@
 include { CUSTOM_SRATOOLSNCBISETTINGS } from '../../../modules/custom/sratoolsncbisettings/main'
 include { SRATOOLS_PREFETCH           } from '../../../modules/sratools/prefetch/main'
 include { SRATOOLS_FASTERQDUMP        } from '../../../modules/sratools/fasterqdump/main'
 /**
 * Download FASTQ sequencing reads from the NCBI's Sequence Read Archive (SRA).
 */
 workflow SRAFASTQ {
    take:
    sra_ids  // channel: [ val(meta), val(id) ]
    main:
    ch_versions = Channel.empty()
    //
    // Detect existing NCBI user settings or create new ones.
    //
    CUSTOM_SRATOOLSNCBISETTINGS()
    def settings = CUSTOM_SRATOOLSNCBISETTINGS.out.ncbi_settings
    ch_versions = ch_versions.mix( CUSTOM_SRATOOLSNCBISETTINGS.out.versions )
    //
    // Prefetch sequencing reads in SRA format.
    //
    SRATOOLS_PREFETCH ( sra_ids, settings )
    ch_versions = ch_versions.mix( SRATOOLS_PREFETCH.out.versions.first() )
    //
    // Convert the SRA format into one or more compressed FASTQ files.
    //
    SRATOOLS_FASTERQDUMP ( SRATOOLS_PREFETCH.out.sra, settings )
    ch_versions = ch_versions.mix( SRATOOLS_FASTERQDUMP.out.versions.first() )
    emit:
    reads    = SRATOOLS_FASTERQDUMP.out.reads  // channel: [ val(meta), [ reads ] ]
    versions = ch_versions                     // channel: [ versions.yml ]
 }
--- a/subworkflows/nf-core/sra_fastq/meta.yml
+++ b/subworkflows/nf-core/sra_fastq/meta.yml
@ -1,11 +1,14 @@
 name: sra_fastq
 description: Download FASTQ sequencing reads from the NCBI's Sequence Read Archive (SRA).
 keywords:
  - SRA
  - NCBI
  - sequencing
  - FASTQ
  - prefetch
-  - dump
+  - fasterq-dump
 modules:
  - custom/sratoolsncbisettings
  - sratools/prefetch
  - sratools/fasterqdump
 input:
@ -17,7 +20,7 @@ input:
  - id:
      type: string
      description: >
-        SRA identifier.
+        SRA run identifier.
 # TODO Update when we decide on a standard for subworkflow docs
 output:
  - meta:
--- a/subworkflows/nf-core/sra_fastq/nextflow.config
+++ b/subworkflows/nf-core/sra_fastq/nextflow.config
--- a/tests/config/pytest_modules.yml
+++ b/tests/config/pytest_modules.yml
@ -839,6 +839,10 @@ genmap/mappability:
  - modules/genmap/mappability/**
  - tests/modules/genmap/mappability/**
 genomescope2:
  - modules/genomescope2/**
  - tests/modules/genomescope2/**
 genrich:
  - modules/genrich/**
  - tests/modules/genrich/**
@ -1647,14 +1651,14 @@ samtools/bam2fq:
  - modules/samtools/bam2fq/**
  - tests/modules/samtools/bam2fq/**
 samtools/convert:
  - modules/samtools/convert/**
  - tests/modules/samtools/convert/**
 samtools/collatefastq:
  - modules/samtools/collatefastq/**
  - tests/modules/samtools/collatefastq/**
 samtools/convert:
  - modules/samtools/convert/**
  - tests/modules/samtools/convert/**
 samtools/depth:
  - modules/samtools/depth/**
  - tests/modules/samtools/depth/**
--- a/tests/modules/genomescope2/main.nf
+++ b/tests/modules/genomescope2/main.nf
@ -0,0 +1,19 @@
 #!/usr/bin/env nextflow
 nextflow.enable.dsl = 2
 include { MERYL_COUNT     } from '../../../modules/meryl/count/main.nf'
 include { MERYL_HISTOGRAM } from '../../../modules/meryl/histogram/main.nf'
 include { GENOMESCOPE2    } from '../../../modules/genomescope2/main.nf'
 workflow test_genomescope2 {
    input = [
        [ id:'test', single_end:false ], // meta map
        file(params.test_data['bacteroides_fragilis']['illumina']['test1_1_fastq_gz'], checkIfExists: true)
    ]
    MERYL_COUNT ( input )
    MERYL_HISTOGRAM ( MERYL_COUNT.out.meryl_db )
    GENOMESCOPE2 ( MERYL_HISTOGRAM.out.hist )
 }
--- a/tests/modules/genomescope2/nextflow.config
+++ b/tests/modules/genomescope2/nextflow.config
@ -0,0 +1,13 @@
 process {
    publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
    withName: 'MERYL.*' {
        ext.args = 'k=21'
    }
    withName: 'GENOMESCOPE2' {
        ext.args = '-k 21 -p 1'
    }
 }
--- a/tests/modules/genomescope2/test.yml
+++ b/tests/modules/genomescope2/test.yml
@ -0,0 +1,22 @@
 - name: genomescope2 test_genomescope2
  command: nextflow run tests/modules/genomescope2 -entry test_genomescope2 -c tests/config/nextflow.config
  tags:
    - genomescope2
  files:
    - path: output/genomescope2/test_linear_plot.png
      md5sum: 94c165c5028156299a1d4d05766cac51
    - path: output/genomescope2/test_log_plot.png
      md5sum: 9d25ca463d92a0c73a893da7fd3979ba
    - path: output/genomescope2/test_model.txt
      md5sum: 3caf62f715f64a2f2b8fdff5d079cb84
    - path: output/genomescope2/test_summary.txt
      md5sum: 7452860e2cea99b85f3ff60daeac77f5
    - path: output/genomescope2/test_transformed_linear_plot.png
      md5sum: 99a64c1c18d8670f64cb863d4334abbb
    - path: output/genomescope2/test_transformed_log_plot.png
      md5sum: b4e029c9fb9987ca33b17392a691c1b4
    - path: output/genomescope2/versions.yml
      md5sum: 18afeb26f62a47f680b2bb3e27da9cbc
    - path: output/meryl/test.hist
      md5sum: f75362ab9cd70d96621b3690e952085f
    - path: output/meryl/versions.yml
--- a/tests/subworkflows/nf-core/sra_fastq/main.nf
+++ b/tests/subworkflows/nf-core/sra_fastq/main.nf
@ -1,23 +0,0 @@
 #!/usr/bin/env nextflow
 nextflow.enable.dsl = 2
 include { SRA_FASTQ } from '../../../../subworkflows/nf-core/sra_fastq/main.nf' addParams( [:] )
 workflow test_sra_fastq_single_end {
    input = [
        [ id:'test_single_end', single_end:true ], // meta map
        'SRR13255544'
    ]
    SRA_FASTQ ( input )
 }
 workflow test_sra_fastq_paired_end {
    input = [
        [ id:'test_paired_end', single_end:false ], // meta map
        'SRR11140744'
    ]
    SRA_FASTQ ( input )
 }
--- a/tests/subworkflows/nf-core/sra_fastq/test.yml
+++ b/tests/subworkflows/nf-core/sra_fastq/test.yml
@ -1,27 +0,0 @@
 - name: sra fastq single-end
  command: nextflow run ./tests/subworkflows/nf-core/sra_fastq -entry test_sra_fastq_single_end -c tests/config/nextflow.config
  tags:
    - subworkflows
    # - subworkflows/sra_fastq
    # Modules
    # - sratools
    # - sratools/prefetch
    # - sratools/fasterqdump
  files:
    - path: output/sratools/SRR13255544.fastq.gz
      md5sum: 1054c7b71884acdb5eed8a378f18be82
 - name: sra fastq paired-end
  command: nextflow run ./tests/subworkflows/nf-core/sra_fastq -entry test_sra_fastq_paired_end -c tests/config/nextflow.config
  tags:
    - subworkflows
    # - subworkflows/sra_fastq
    # Modules
    # - sratools
    # - sratools/prefetch
    # - sratools/fasterqdump
  files:
    - path: output/sratools/SRR11140744_1.fastq.gz
      md5sum: 193809c784a4ea132ab2a253fa4f55b6
    - path: output/sratools/SRR11140744_2.fastq.gz
      md5sum: 3e3b3af3413f50a1685fd7b3f1456d4e
--- a/tests/subworkflows/nf-core/srafastq/main.nf
+++ b/tests/subworkflows/nf-core/srafastq/main.nf
@ -0,0 +1,29 @@
 #!/usr/bin/env nextflow
 nextflow.enable.dsl = 2
 include { SRAFASTQ } from '../../../../subworkflows/nf-core/srafastq/main.nf'
 workflow test_srafastq_single_end {
    input = Channel.of(
        [
            [ id:'test_single_end1', single_end:true ], // meta map
            'DRR000774'
        ],
        [
            [ id:'test_single_end2', single_end:true ], // meta map
            'DRR000775'
        ]
    )
    SRAFASTQ ( input )
 }
 workflow test_srafastq_paired_end {
    input = [
        [ id:'test_paired_end', single_end:false ], // meta map
        'SRR11140744'
    ]
    SRAFASTQ ( input )
 }
--- a/tests/subworkflows/nf-core/srafastq/nextflow.config
+++ b/tests/subworkflows/nf-core/srafastq/nextflow.config
@ -0,0 +1,5 @@
 process {
    publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
 }
--- a/tests/subworkflows/nf-core/srafastq/test.yml
+++ b/tests/subworkflows/nf-core/srafastq/test.yml
@ -0,0 +1,29 @@
 - name: srafastq single-end
  command: nextflow run ./tests/subworkflows/nf-core/srafastq -entry test_srafastq_single_end -c tests/config/nextflow.config -c tests/subworkflows/nf-core/srafastq/nextflow.config
  tags:
    - subworkflows
    # - subworkflows/srafastq
    # Modules
    # - sratools
    # - sratools/prefetch
    # - sratools/fasterqdump
  files:
    - path: output/sratools/DRR000774.fastq.gz
      md5sum: 19029a1132115b55277a0d79ee089b49
    - path: output/sratools/DRR000775.fastq.gz
      md5sum: 59ff24c86ecb260752668c059c2a1eaf
 - name: srafastq paired-end
  command: nextflow run ./tests/subworkflows/nf-core/srafastq -entry test_srafastq_paired_end -c tests/config/nextflow.config -c tests/subworkflows/nf-core/srafastq/nextflow.config
  tags:
    - subworkflows
    # - subworkflows/srafastq
    # Modules
    # - sratools
    # - sratools/prefetch
    # - sratools/fasterqdump
  files:
    - path: output/sratools/SRR11140744_1.fastq.gz
      md5sum: 193809c784a4ea132ab2a253fa4f55b6
    - path: output/sratools/SRR11140744_2.fastq.gz
      md5sum: 3e3b3af3413f50a1685fd7b3f1456d4e