Merge branch 'master' of https://github.com/nf-core/modules into mpileup_bgzip

2024-12-22 02:58:17 +00:00 · 2022-06-13 08:31:01 +02:00 · 2022-06-13 08:31:01 +02:00 · c3b6ec8f3b
commit c3b6ec8f3b
parent dc6c09e083 5affd97cf0
12 changed files with 292 additions and 0 deletions
--- a/modules/ampir/main.nf
+++ b/modules/ampir/main.nf
@ -0,0 +1,48 @@
+process AMPIR {
+    tag "$meta.id"
+    label 'process_low'
+
+    conda (params.enable_conda ? "conda-forge::r-ampir=1.1.0" : null)
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/r-ampir:1.1.0':
+        'quay.io/biocontainers/r-ampir:1.1.0' }"
+
+    input:
+    tuple val(meta), path(faa)
+    val model
+    val min_length
+    val min_probability
+
+    output:
+    tuple val(meta), path("*.faa"), emit: amps_faa
+    tuple val(meta), path("*.tsv"), emit: amps_tsv
+    path "versions.yml"           , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    min_length = ("${min_length}" == "[]") ? "": " min_len = as.integer(${min_length})," // Fall back to AMPir default value if none specified
+    if ("$faa" == "${prefix}.faa") error "Input and output names are the same, set prefix in module configuration to disambiguate!"
+    """
+    #!/usr/bin/env Rscript
+    library(ampir)
+
+    input_seqs <- read_faa('${faa}')
+    prediction <- predict_amps(input_seqs,${min_length} model = '${model}')
+    prediction <- prediction[which(prediction\$prob_AMP >= as.numeric(${min_probability})), ]
+    output_seqs <- input_seqs[row.names(prediction), ]
+    write.table(prediction, file = "${prefix}.tsv", row.names = FALSE, sep = "\t", quote = FALSE, dec = '.')
+    df_to_faa(output_seqs, "${prefix}.faa")
+
+    version_file_path <- "versions.yml"
+    version_ampir <- paste(unlist(packageVersion("ampir")), collapse = ".")
+    f <- file(version_file_path, "w")
+    writeLines('"${task.process}":', f)
+    writeLines("    ampir: ", f, sep = "")
+    writeLines(version_ampir, f)
+    close(f)
+    """
+}
--- a/modules/ampir/meta.yml
+++ b/modules/ampir/meta.yml
@ -0,0 +1,59 @@
+name: "ampir"
+description: A fast and user-friendly method to predict antimicrobial peptides (AMPs) from any given size protein dataset. ampir uses a supervised statistical machine learning approach to predict AMPs.
+keywords:
+  - ampir
+  - amp
+  - antimicrobial peptide prediction
+tools:
+  - "ampir":
+      description: "A toolkit to predict antimicrobial peptides from protein sequences on a genome-wide scale."
+      homepage: "https://github.com/Legana/ampir"
+      documentation: "https://cran.r-project.org/web/packages/ampir/index.html"
+      tool_dev_url: "https://github.com/Legana/ampir"
+      doi: "10.1093/bioinformatics/btaa653"
+      licence: ["GPL v2"]
+
+input:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - faa:
+      type: file
+      description: FASTA file containing amino acid sequences
+      pattern: "*.{faa,fasta}"
+  - model:
+      type: value
+      description: Built-in model for AMP prediction
+      pattern: "{precursor,mature}"
+  - min_length:
+      type: value
+      description: Minimum protein length for which predictions will be generated
+      pattern: "[0-9]+"
+  - min_probability:
+      type: value
+      description: Cut-off for AMP prediction
+      pattern: "[0-9][0-9]"
+
+output:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - versions:
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
+  - amps_faa:
+      type: file
+      description: File containing AMP predictions in amino acid FASTA format
+      pattern: "*.{faa}"
+  - amps_tsv:
+      type: file
+      description: File containing AMP predictions in TSV format
+      pattern: "*.tsv"
+
+authors:
+  - "@jasmezz"
--- a/modules/haplocheck/main.nf
+++ b/modules/haplocheck/main.nf
@ -0,0 +1,44 @@
+process HAPLOCHECK {
+    tag "$meta.id"
+    label 'process_low'
+
+    conda (params.enable_conda ? "bioconda::haplocheck=1.3.3" : null)
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/haplocheck:1.3.3--h4a94de4_0':
+        'quay.io/biocontainers/haplocheck:1.3.3--h4a94de4_0' }"
+
+    input:
+    tuple val(meta), path(vcf)
+
+    output:
+    tuple val(meta), path("*.txt") , emit: txt
+    tuple val(meta), path("*.html"), emit: html
+    path "versions.yml"            , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    """
+    haplocheck --raw --out $prefix $vcf
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        haplocheck: \$(echo \$(haplocheck --version 2>&1) | cut -f 2 -d " " )
+    END_VERSIONS
+    """
+
+    stub:
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    """
+    touch ${prefix}.raw.txt
+    touch ${prefix}.html
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        haplocheck: \$(echo \$(haplocheck --version 2>&1) | cut -f 2 -d " " )
+    END_VERSIONS
+    """
+}
--- a/modules/haplocheck/meta.yml
+++ b/modules/haplocheck/meta.yml
@ -0,0 +1,55 @@
+name: "haplocheck"
+description: |
+  Haplocheck detects contamination patterns in mtDNA AND WGS sequencing studies by analyzing
+  the mitochondrial DNA. Haplocheck also works as a proxy tool for nDNA studies and provides
+  users a graphical report to investigate the contamination further. Internally, it uses the
+  Haplogrep tool, that supports rCRS and RSRS mitochondrial versions.
+keywords:
+  - mitochondrial
+  - mtDNA
+  - contamination
+tools:
+  - "haplocheck":
+      description: "Detects in-sample contamination in mtDNA or WGS sequencing studies by analyzing the mitochondrial content."
+      homepage: "https://github.com/genepi/haplocheck"
+      documentation: "https://github.com/genepi/haplocheck"
+      tool_dev_url: "https://github.com/genepi/haplocheck"
+      doi: 10.1101/gr.256545.119
+      licence: "['MIT']"
+
+input:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+
+  - vcf:
+      type: file
+      description: VCF file
+      pattern: "*.{vcf.gz}"
+
+output:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+
+  - versions:
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
+
+  - txt:
+      type: file
+      description: Raw report in txt format
+      pattern: "*.{txt}"
+
+  - html:
+      type: file
+      description: Haplocheck HTML report
+      pattern: "*.{html}"
+
+authors:
+  - "@lmtani"
--- a/tests/config/pytest_modules.yml
+++ b/tests/config/pytest_modules.yml
@ -26,6 +26,10 @@ allelecounter:
  - modules/allelecounter/**
  - tests/modules/allelecounter/**

+ampir:
+  - modules/ampir/**
+  - tests/modules/ampir/**
+
 amplify/predict:
  - modules/amplify/predict/**
  - tests/modules/amplify/predict/**
@ -963,6 +967,10 @@ hamronization/summarize:
  - modules/hamronization/summarize/**
  - tests/modules/hamronization/summarize/**

+haplocheck:
+  - modules/haplocheck/**
+  - tests/modules/haplocheck/**
+
 happy/happy:
  - modules/happy/happy/**
  - tests/modules/happy/happy/**
--- a/tests/config/test_data.config
+++ b/tests/config/test_data.config
@ -329,6 +329,8 @@ params {
                test_rnaseq_vcf                               =  "${test_data_dir}/genomics/homo_sapiens/illumina/vcf/test.rnaseq.vcf"
                test_sv_vcf                                   =  "${test_data_dir}/genomics/homo_sapiens/illumina/vcf/sv_query.vcf.gz"

+                test_mito_vcf                                 = "${test_data_dir}/genomics/homo_sapiens/illumina/vcf/NA12878_chrM.vcf.gz"
+
                test_pytor                                    = "${test_data_dir}/genomics/homo_sapiens/illumina/pytor/test.pytor"

                test_flowcell                                 = "${test_data_dir}/genomics/homo_sapiens/illumina/bcl/flowcell.tar.gz"
--- a/tests/modules/ampir/main.nf
+++ b/tests/modules/ampir/main.nf
@ -0,0 +1,20 @@
+#!/usr/bin/env nextflow
+
+nextflow.enable.dsl = 2
+
+include { AMPIR } from '../../../modules/ampir/main.nf'
+
+workflow test_ampir {
+
+    fasta = [ [ id:'test', single_end:false ], // meta map
+              file(params.test_data['candidatus_portiera_aleyrodidarum']['genome']['proteome_fasta'], checkIfExists: true),
+    ]
+
+    model = "precursor"
+
+    min_length = []
+
+    min_probability = "0.7"
+
+    AMPIR ( fasta, model, min_length, min_probability )
+}
--- a/tests/modules/ampir/nextflow.config
+++ b/tests/modules/ampir/nextflow.config
@ -0,0 +1,5 @@
+process {
+
+    publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
+    
+}
--- a/tests/modules/ampir/test.yml
+++ b/tests/modules/ampir/test.yml
@ -0,0 +1,11 @@
+- name: ampir test_ampir
+  command: nextflow run ./tests/modules/ampir -entry test_ampir -c ./tests/config/nextflow.config  -c ./tests/modules/ampir/nextflow.config
+  tags:
+    - ampir
+  files:
+    - path: output/ampir/test.tsv
+      contains: ["seq_name\tseq_aa\tprob_AMP", "WP_014895017.1"]
+    - path: output/ampir/test.faa
+      md5sum: 0435609144022c55ac196db053f0df89
+    - path: output/ampir/versions.yml
+      md5sum: 4a11d25b8a904a7ffb34ae88f6826888
--- a/tests/modules/haplocheck/main.nf
+++ b/tests/modules/haplocheck/main.nf
@ -0,0 +1,15 @@
+#!/usr/bin/env nextflow
+
+nextflow.enable.dsl = 2
+
+include { HAPLOCHECK } from '../../../modules/haplocheck/main.nf'
+
+workflow test_haplocheck {
+
+    input = [
+        [ id:'test' ], // meta map
+        file(params.test_data['homo_sapiens']['illumina']['test_mito_vcf'], checkIfExists: true)
+    ]
+
+    HAPLOCHECK ( input )
+}
--- a/tests/modules/haplocheck/nextflow.config
+++ b/tests/modules/haplocheck/nextflow.config
@ -0,0 +1,5 @@
+process {
+
+    publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
+
+}
--- a/tests/modules/haplocheck/test.yml
+++ b/tests/modules/haplocheck/test.yml
@ -0,0 +1,20 @@
+- name: haplocheck test_haplocheck
+  command: nextflow run ./tests/modules/haplocheck -entry test_haplocheck -c ./tests/config/nextflow.config -c ./tests/modules/haplocheck/nextflow.config
+  tags:
+    - haplocheck
+  files:
+    - path: output/haplocheck/test.html
+      md5sum: 59d69052c86edff0301816956eaf4d5f
+    - path: output/haplocheck/test.raw.txt
+      md5sum: 69f4e5b28a59b97fc19eb8e8b650d9d5
+    - path: output/haplocheck/versions.yml
+      md5sum: 94e2fa3ceb3946487319f92cea08c942
+
+- name: haplocheck test_haplocheck using stubs
+  command: nextflow run ./tests/modules/haplocheck -entry test_haplocheck -c ./tests/config/nextflow.config -c ./tests/modules/haplocheck/nextflow.config -stub-run
+  tags:
+    - haplocheck
+  files:
+    - path: output/haplocheck/test.html
+    - path: output/haplocheck/test.raw.txt
+    - path: output/haplocheck/versions.yml