Merge branch 'add_compression_to_bam2fq' of https://github.com/Genomic-Medicine-Linkoping/modules into add_compression_to_bam2fq

2024-12-22 02:58:17 +00:00 · 2022-05-05 08:32:48 +02:00 · 2022-05-05 08:32:48 +02:00 · 888e122f05
commit 888e122f05
parent be8ce6de2a e757966566
17 changed files with 408 additions and 8 deletions
--- a/modules/bowtie2/align/main.nf
+++ b/modules/bowtie2/align/main.nf
@ -29,6 +29,8 @@ process BOWTIE2_ALIGN {
        def unaligned = save_unaligned ? "--un-gz ${prefix}.unmapped.fastq.gz" : ''
        """
        INDEX=`find -L ./ -name "*.rev.1.bt2" | sed 's/.rev.1.bt2//'`
+        [ -z "\$INDEX" ] && INDEX=`find -L ./ -name "*.rev.1.bt2l" | sed 's/.rev.1.bt2l//'`
+        [ -z "\$INDEX" ] && echo "BT2 index files not found" 1>&2 && exit 1
        bowtie2 \\
            -x \$INDEX \\
            -U $reads \\
@ -49,6 +51,8 @@ process BOWTIE2_ALIGN {
        def unaligned = save_unaligned ? "--un-conc-gz ${prefix}.unmapped.fastq.gz" : ''
        """
        INDEX=`find -L ./ -name "*.rev.1.bt2" | sed 's/.rev.1.bt2//'`
+        [ -z "\$INDEX" ] && INDEX=`find -L ./ -name "*.rev.1.bt2l" | sed 's/.rev.1.bt2l//'`
+        [ -z "\$INDEX" ] && echo "BT2 index files not found" 1>&2 && exit 1
        bowtie2 \\
            -x \$INDEX \\
            -1 ${reads[0]} \\
--- a/modules/happy/happy/main.nf
+++ b/modules/happy/happy/main.nf
@ -0,0 +1,42 @@
+def VERSION = '0.3.14'
+
+process HAPPY_HAPPY {
+    tag "$meta.id"
+    label 'process_medium'
+
+    conda (params.enable_conda ? "bioconda::hap.py=0.3.14" : null)
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/hap.py:0.3.14--py27h5c5a3ab_0':
+        'quay.io/biocontainers/hap.py:0.3.14--py27h5c5a3ab_0' }"
+
+    input:
+    tuple val(meta), path(truth_vcf), path(query_vcf), path(bed)
+    tuple path(fasta), path(fasta_fai)
+
+    output:
+    tuple val(meta), path('*.csv'), path('*.json')  , emit: metrics
+    path "versions.yml"                             , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+    def prefix = task.ext.prefix ?: "${meta.id}"
+
+    """
+    hap.py \\
+        $truth_vcf \\
+        $query_vcf \\
+        $args \\
+        --reference $fasta \\
+        --threads $task.cpus \\
+        -R $bed \\
+        -o $prefix
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        hap.py: $VERSION
+    END_VERSIONS
+    """
+}
--- a/modules/happy/happy/meta.yml
+++ b/modules/happy/happy/meta.yml
@ -0,0 +1,67 @@
+name: "happy_happy"
+description: Hap.py is a tool to compare diploid genotypes at haplotype level. Rather than comparing VCF records row by row, hap.py will generate and match alternate sequences in a superlocus. A superlocus is a small region of the genome (sized between 1 and around 1000 bp) that contains one or more variants.
+keywords:
+  - happy
+  - benchmark
+  - haplotype
+tools:
+  - "happy":
+      description: "Haplotype VCF comparison tools"
+      homepage: "https://www.illumina.com/products/by-type/informatics-products/basespace-sequence-hub/apps/hap-py-benchmarking.html"
+      documentation: "https://github.com/Illumina/hap.py"
+      tool_dev_url: "https://github.com/Illumina/hap.py"
+      doi: ""
+      licence: "['BSD-2-clause']"
+
+input:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - truth_vcf:
+      type: file
+      description: gold standard VCF file
+      pattern: "*.{vcf,vcf.gz}"
+  - query_vcf:
+      type: file
+      description: VCF/GVCF file to query
+      pattern: "*.{vcf,vcf.gz}"
+  - bed:
+      type: file
+      description: BED file
+      pattern: "*.bed"
+  - fasta:
+      type: file
+      description: FASTA file of the reference genome
+      pattern: "*.{fa,fasta}"
+  - fasta_fai:
+      type: file
+      description: The index of the reference FASTA
+      pattern: "*.fai"
+
+output:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - summary:
+      type: file
+      description: A CSV file containing the summary of the benchmarking
+      pattern: "*.summary.csv"
+  - extended:
+      type: file
+      description: A CSV file containing extended info of the benchmarking
+      pattern: "*.extended.csv"
+  - runinfo:
+      type: file
+      description: A JSON file containing the run info
+      pattern: "*.runinfo.json"
+  - versions:
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
+
+authors:
+  - "@nvnieuwk"
--- a/modules/happy/prepy/main.nf
+++ b/modules/happy/prepy/main.nf
@ -0,0 +1,41 @@
+def VERSION = '0.3.14'
+
+process HAPPY_PREPY {
+    tag "$meta.id"
+    label 'process_medium'
+
+    conda (params.enable_conda ? "bioconda::hap.py=0.3.14" : null)
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/hap.py:0.3.14--py27h5c5a3ab_0':
+        'quay.io/biocontainers/hap.py:0.3.14--py27h5c5a3ab_0' }"
+
+    input:
+    tuple val(meta), path(vcf), path(bed)
+    tuple path(fasta), path(fasta_fai)
+
+    output:
+    tuple val(meta), path('*.vcf.gz')  , emit: preprocessed_vcf
+    path "versions.yml"                , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+    def prefix = task.ext.prefix ?: "${meta.id}"
+
+    """
+    pre.py \\
+        $args \\
+        -R $bed \\
+        --reference $fasta \\
+        --threads $task.cpus \\
+        $vcf \\
+        ${prefix}.vcf.gz
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        pre.py: $VERSION
+    END_VERSIONS
+    """
+}
--- a/modules/happy/prepy/meta.yml
+++ b/modules/happy/prepy/meta.yml
@ -0,0 +1,55 @@
+name: "happy_prepy"
+description: Pre.py is a preprocessing tool made to preprocess VCF files for Hap.py
+keywords:
+  - happy
+  - benchmark
+  - haplotype
+tools:
+  - "happy":
+      description: "Haplotype VCF comparison tools"
+      homepage: "https://www.illumina.com/products/by-type/informatics-products/basespace-sequence-hub/apps/hap-py-benchmarking.html"
+      documentation: "https://github.com/Illumina/hap.py"
+      tool_dev_url: "https://github.com/Illumina/hap.py"
+      doi: ""
+      licence: "['BSD-2-clause']"
+
+input:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - vcf:
+      type: file
+      description: VCF file to preprocess
+      pattern: "*.{vcf,vcf.gz}"
+  - bed:
+      type: file
+      description: BED file
+      pattern: "*.bed"
+  - fasta:
+      type: file
+      description: FASTA file of the reference genome
+      pattern: "*.{fa,fasta}"
+  - fasta_fai:
+      type: file
+      description: The index of the reference FASTA
+      pattern: "*.fai"
+
+output:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - vcf:
+      type: file
+      description: A preprocessed VCF file
+      pattern: "*.vcf.gz"
+  - versions:
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
+
+authors:
+  - "@nvnieuwk"
--- a/modules/vardictjava/main.nf
+++ b/modules/vardictjava/main.nf
@ -10,8 +10,7 @@ process VARDICTJAVA {
        'quay.io/biocontainers/vardict-java:1.8.3--hdfd78af_0' }"

    input:
-    tuple val(meta), path(bam), path(bai)
-    path(bed)
+    tuple val(meta), path(bam), path(bai), path(bed)
    tuple path(fasta), path(fasta_fai)

    output:
--- a/tests/config/pytest_modules.yml
+++ b/tests/config/pytest_modules.yml
@ -891,6 +891,14 @@ hamronization/summarize:
  - modules/hamronization/summarize/**
  - tests/modules/hamronization/summarize/**

+happy/happy:
+  - modules/happy/happy/**
+  - tests/modules/happy/happy/**
+
+happy/prepy:
+  - modules/happy/prepy/**
+  - tests/modules/happy/prepy/**
+
 hicap:
  - modules/hicap/**
  - tests/modules/hicap/**
--- a/tests/modules/bowtie2/align/main.nf
+++ b/tests/modules/bowtie2/align/main.nf
@ -32,4 +32,4 @@ workflow test_bowtie2_align_paired_end {

    BOWTIE2_BUILD ( fasta )
    BOWTIE2_ALIGN ( input, BOWTIE2_BUILD.out.index, save_unaligned )
-}
+}
--- a/tests/modules/bowtie2/align/nextflow.config
+++ b/tests/modules/bowtie2/align/nextflow.config
@ -1,5 +1,16 @@
+params {
+    force_large_index = false
+}
+
 process {

    publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
-
+}
+
+if (params.force_large_index) {
+    process {
+        withName: BOWTIE2_BUILD {
+            ext.args = '--large-index'
+        }
+    }
 }
--- a/tests/modules/bowtie2/align/test.yml
+++ b/tests/modules/bowtie2/align/test.yml
@ -39,3 +39,45 @@
      md5sum: 52be6950579598a990570fbcf5372184
    - path: ./output/bowtie2/bowtie2/genome.rev.2.bt2
      md5sum: e3b4ef343dea4dd571642010a7d09597
+
+- name: bowtie2 align single-end large-index
+  command: nextflow run ./tests/modules/bowtie2/align -entry test_bowtie2_align_single_end -c ./tests/config/nextflow.config -c ./tests/modules/bowtie2/align/nextflow.config --force_large_index
+  tags:
+    - bowtie2
+    - bowtie2/align
+  files:
+    - path: ./output/bowtie2/test.bam
+    - path: ./output/bowtie2/test.bowtie2.log
+    - path: ./output/bowtie2/bowtie2/genome.3.bt2l
+      md5sum: 8952b3e0b1ce9a7a5916f2e147180853
+    - path: ./output/bowtie2/bowtie2/genome.2.bt2l
+      md5sum: 22c284084784a0720989595e0c9461fd
+    - path: ./output/bowtie2/bowtie2/genome.1.bt2l
+      md5sum: 07d811cd4e350d56267183d2ac7023a5
+    - path: ./output/bowtie2/bowtie2/genome.4.bt2l
+      md5sum: c25be5f8b0378abf7a58c8a880b87626
+    - path: ./output/bowtie2/bowtie2/genome.rev.1.bt2l
+      md5sum: fda48e35925fb24d1c0785f021981e25
+    - path: ./output/bowtie2/bowtie2/genome.rev.2.bt2l
+      md5sum: 802c26d32b970e1b105032b7ce7348b4
+
+- name: bowtie2 align paired-end large-index
+  command: nextflow run ./tests/modules/bowtie2/align -entry test_bowtie2_align_paired_end -c ./tests/config/nextflow.config -c ./tests/modules/bowtie2/align/nextflow.config --force_large_index
+  tags:
+    - bowtie2
+    - bowtie2/align
+  files:
+    - path: ./output/bowtie2/test.bam
+    - path: ./output/bowtie2/test.bowtie2.log
+    - path: ./output/bowtie2/bowtie2/genome.3.bt2l
+      md5sum: 8952b3e0b1ce9a7a5916f2e147180853
+    - path: ./output/bowtie2/bowtie2/genome.2.bt2l
+      md5sum: 22c284084784a0720989595e0c9461fd
+    - path: ./output/bowtie2/bowtie2/genome.1.bt2l
+      md5sum: 07d811cd4e350d56267183d2ac7023a5
+    - path: ./output/bowtie2/bowtie2/genome.4.bt2l
+      md5sum: c25be5f8b0378abf7a58c8a880b87626
+    - path: ./output/bowtie2/bowtie2/genome.rev.1.bt2l
+      md5sum: fda48e35925fb24d1c0785f021981e25
+    - path: ./output/bowtie2/bowtie2/genome.rev.2.bt2l
+      md5sum: 802c26d32b970e1b105032b7ce7348b4
--- a/tests/modules/happy/happy/main.nf
+++ b/tests/modules/happy/happy/main.nf
@ -0,0 +1,39 @@
+#!/usr/bin/env nextflow
+
+nextflow.enable.dsl = 2
+
+include { HAPPY_HAPPY } from '../../../../modules/happy/happy/main.nf'
+
+workflow test_happy_vcf {
+    
+    input = [
+        [ id:'test' ], // meta map
+        file(params.test_data['homo_sapiens']['illumina']['test_rnaseq_vcf'], checkIfExists: true),
+        file(params.test_data['homo_sapiens']['illumina']['test_genome21_indels_vcf_gz'], checkIfExists: true),
+        file(params.test_data['homo_sapiens']['genome']['genome_bed'], checkIfExists: true)        
+    ]
+
+    fasta = Channel.value([
+        file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true),
+        file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
+    ])
+
+    HAPPY_HAPPY ( input, fasta )
+}
+
+workflow test_happy_gvcf {
+    
+    input = [
+        [ id:'test' ], // meta map
+        file(params.test_data['homo_sapiens']['illumina']['test_rnaseq_vcf'], checkIfExists: true),
+        file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf'], checkIfExists: true),
+        file(params.test_data['homo_sapiens']['genome']['genome_bed'], checkIfExists: true)        
+    ]
+
+    fasta = Channel.value([
+        file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true),
+        file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
+    ])
+
+    HAPPY_HAPPY ( input, fasta )
+}
--- a/tests/modules/happy/happy/nextflow.config
+++ b/tests/modules/happy/happy/nextflow.config
@ -0,0 +1,5 @@
+process {
+
+    publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
+    
+}
--- a/tests/modules/happy/happy/test.yml
+++ b/tests/modules/happy/happy/test.yml
@ -0,0 +1,27 @@
+- name: happy happy test_happy_vcf
+  command: nextflow run tests/modules/happy/happy -entry test_happy_vcf -c tests/config/nextflow.config
+  tags:
+    - happy
+    - happy/happy
+  files:
+    - path: output/happy/test.extended.csv
+      md5sum: ef79c7c789ef4f146ca2e50dafaf22b3
+    - path: output/happy/test.runinfo.json
+    - path: output/happy/test.summary.csv
+      md5sum: f8aa5d36d3c48dede2f607fd565894ad
+    - path: output/happy/versions.yml
+      md5sum: 82243bf6dbdc71aa63211ee2a89f47f2
+
+- name: happy happy test_happy_gvcf
+  command: nextflow run tests/modules/happy/happy -entry test_happy_gvcf -c tests/config/nextflow.config
+  tags:
+    - happy
+    - happy/happy
+  files:
+    - path: output/happy/test.extended.csv
+      md5sum: 3d5c21b67a259a3f6dcb088d55b86cd3
+    - path: output/happy/test.runinfo.json
+    - path: output/happy/test.summary.csv
+      md5sum: 03044e9bb5a0c6f0947b7e910fc8a558
+    - path: output/happy/versions.yml
+      md5sum: 551fa216952d6f5de78e6e453b92aaab
--- a/tests/modules/happy/prepy/main.nf
+++ b/tests/modules/happy/prepy/main.nf
@ -0,0 +1,37 @@
+#!/usr/bin/env nextflow
+
+nextflow.enable.dsl = 2
+
+include { HAPPY_PREPY } from '../../../../modules/happy/prepy/main.nf'
+
+workflow test_happy_prepy_vcf {
+    
+    input = [
+        [ id:'test' ], // meta map
+        file(params.test_data['homo_sapiens']['illumina']['test_genome21_indels_vcf_gz'], checkIfExists: true),
+        file(params.test_data['homo_sapiens']['genome']['genome_bed'], checkIfExists: true)        
+    ]
+
+    fasta = Channel.value([
+        file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true),
+        file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
+    ])
+
+    HAPPY_PREPY ( input, fasta )
+}
+
+workflow test_happy_prepy_gvcf {
+    
+    input = [
+        [ id:'test' ], // meta map
+        file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf'], checkIfExists: true),
+        file(params.test_data['homo_sapiens']['genome']['genome_bed'], checkIfExists: true)        
+    ]
+
+    fasta = Channel.value([
+        file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true),
+        file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
+    ])
+
+    HAPPY_PREPY ( input, fasta )
+}
--- a/tests/modules/happy/prepy/nextflow.config
+++ b/tests/modules/happy/prepy/nextflow.config
@ -0,0 +1,5 @@
+process {
+
+    publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
+    
+}
--- a/tests/modules/happy/prepy/test.yml
+++ b/tests/modules/happy/prepy/test.yml
@ -0,0 +1,19 @@
+- name: happy prepy test_happy_prepy_vcf
+  command: nextflow run tests/modules/happy/prepy -entry test_happy_prepy_vcf -c tests/config/nextflow.config
+  tags:
+    - happy/prepy
+    - happy
+  files:
+    - path: output/happy/test.vcf.gz
+    - path: output/happy/versions.yml
+      md5sum: 814d20f1f29f23a3d21012748a5d6393
+
+- name: happy prepy test_happy_prepy_gvcf
+  command: nextflow run tests/modules/happy/prepy -entry test_happy_prepy_gvcf -c tests/config/nextflow.config
+  tags:
+    - happy/prepy
+    - happy
+  files:
+    - path: output/happy/test.vcf.gz
+    - path: output/happy/versions.yml
+      md5sum: 970a54de46e68ef6d5228a26eaa4c8e7
--- a/tests/modules/vardictjava/main.nf
+++ b/tests/modules/vardictjava/main.nf
@ -9,15 +9,14 @@ workflow test_vardictjava {
    bam_input_ch = Channel.value([
        [ id:'test' ], // meta map
        file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true),
-        file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true)
+        file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true),
+        file(params.test_data['homo_sapiens']['genome']['genome_bed'], checkIfExists: true)
    ])

-    bed = Channel.value(file(params.test_data['homo_sapiens']['genome']['genome_bed'], checkIfExists: true))
-
    reference = Channel.value([
        file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true),
        file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
    ])

-    VARDICTJAVA ( bam_input_ch, bed, reference )
+    VARDICTJAVA ( bam_input_ch, reference )
 }