mirror of
https://github.com/MillironX/nf-core_modules.git
synced 2024-12-22 02:58:17 +00:00
Merge branch 'master' into firstbranch
This commit is contained in:
commit
3d9c8a6bb1
271 changed files with 4113 additions and 1038 deletions
64
.github/ISSUE_TEMPLATE/bug_report.md
vendored
64
.github/ISSUE_TEMPLATE/bug_report.md
vendored
|
@ -1,64 +0,0 @@
|
|||
---
|
||||
name: Bug report
|
||||
about: Report something that is broken or incorrect
|
||||
title: "[BUG]"
|
||||
---
|
||||
|
||||
<!--
|
||||
# nf-core/module bug report
|
||||
|
||||
Hi there!
|
||||
|
||||
Thanks for telling us about a problem with the modules.
|
||||
Please delete this text and anything that's not relevant from the template below:
|
||||
-->
|
||||
|
||||
## Check Documentation
|
||||
|
||||
I have checked the following places for your error:
|
||||
|
||||
- [ ] [nf-core website: troubleshooting](https://nf-co.re/usage/troubleshooting)
|
||||
- [ ] [nf-core/module documentation](https://github.com/nf-core/modules/blob/master/README.md)
|
||||
|
||||
## Description of the bug
|
||||
|
||||
<!-- A clear and concise description of what the bug is. -->
|
||||
|
||||
## Steps to reproduce
|
||||
|
||||
Steps to reproduce the behaviour:
|
||||
|
||||
1. Command line: <!-- [e.g. `nextflow run ...`] -->
|
||||
2. See error: <!-- [Please provide your error message] -->
|
||||
|
||||
## Expected behaviour
|
||||
|
||||
<!-- A clear and concise description of what you expected to happen. -->
|
||||
|
||||
## Log files
|
||||
|
||||
Have you provided the following extra information/files:
|
||||
|
||||
- [ ] The command used to run the module
|
||||
- [ ] The `.nextflow.log` file <!-- this is a hidden file in the directory where you launched the module -->
|
||||
|
||||
## System
|
||||
|
||||
- Hardware: <!-- [e.g. HPC, Desktop, Cloud...] -->
|
||||
- Executor: <!-- [e.g. slurm, local, awsbatch...] -->
|
||||
- OS: <!-- [e.g. CentOS Linux, macOS, Linux Mint...] -->
|
||||
- Version <!-- [e.g. 7, 10.13.6, 18.3...] -->
|
||||
|
||||
## Nextflow Installation
|
||||
|
||||
- Version: <!-- [e.g. 19.10.0] -->
|
||||
|
||||
## Container engine
|
||||
|
||||
- Engine: <!-- [e.g. Conda, Docker, Singularity or Podman] -->
|
||||
- version: <!-- [e.g. 1.0.0] -->
|
||||
- Image tag: <!-- [e.g. nfcore/module:2.6] -->
|
||||
|
||||
## Additional context
|
||||
|
||||
<!-- Add any other context about the problem here. -->
|
52
.github/ISSUE_TEMPLATE/bug_report.yml
vendored
Normal file
52
.github/ISSUE_TEMPLATE/bug_report.yml
vendored
Normal file
|
@ -0,0 +1,52 @@
|
|||
name: Bug report
|
||||
description: Report something that is broken or incorrect
|
||||
labels: bug
|
||||
body:
|
||||
- type: checkboxes
|
||||
attributes:
|
||||
label: Have you checked the docs?
|
||||
description: I have checked the following places for my error
|
||||
options:
|
||||
- label: "[nf-core website: troubleshooting](https://nf-co.re/usage/troubleshooting)"
|
||||
required: true
|
||||
- label: "[nf-core modules documentation](https://nf-co.re/docs/contributing/modules)"
|
||||
required: true
|
||||
|
||||
- type: textarea
|
||||
id: description
|
||||
attributes:
|
||||
label: Description of the bug
|
||||
description: A clear and concise description of what the bug is.
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: textarea
|
||||
id: command_used
|
||||
attributes:
|
||||
label: Command used and terminal output
|
||||
description: Steps to reproduce the behaviour. Please paste the command you used to launch the pipeline and the output from your terminal.
|
||||
render: console
|
||||
placeholder: |
|
||||
$ nextflow run ...
|
||||
|
||||
Some output where something broke
|
||||
|
||||
- type: textarea
|
||||
id: files
|
||||
attributes:
|
||||
label: Relevant files
|
||||
description: |
|
||||
Please drag and drop the relevant files here. Create a `.zip` archive if the extension is not allowed.
|
||||
Your verbose log file `.nextflow.log` is often useful _(this is a hidden file in the directory where you launched the pipeline)_ as well as custom Nextflow configuration files.
|
||||
|
||||
- type: textarea
|
||||
id: system
|
||||
attributes:
|
||||
label: System information
|
||||
description: |
|
||||
* Nextflow version _(eg. 21.10.3)_
|
||||
* Hardware _(eg. HPC, Desktop, Cloud)_
|
||||
* Executor _(eg. slurm, local, awsbatch)_
|
||||
* Container engine and version: _(e.g. Docker 1.0.0, Singularity, Conda, Podman, Shifter or Charliecloud)_
|
||||
* OS and version: _(eg. CentOS Linux, macOS, Ubuntu 22.04)_
|
||||
* Image tag: <!-- [e.g. nfcore/cellranger:2.6] -->
|
32
.github/ISSUE_TEMPLATE/feature_request.md
vendored
32
.github/ISSUE_TEMPLATE/feature_request.md
vendored
|
@ -1,32 +0,0 @@
|
|||
---
|
||||
name: Feature request
|
||||
about: Suggest an idea for nf-core/modules
|
||||
title: "[FEATURE]"
|
||||
---
|
||||
|
||||
<!--
|
||||
# nf-core/modules feature request
|
||||
|
||||
Hi there!
|
||||
|
||||
Thanks for suggesting a new feature for the modules!
|
||||
Please delete this text and anything that's not relevant from the template below:
|
||||
-->
|
||||
|
||||
## Is your feature request related to a problem? Please describe
|
||||
|
||||
<!-- A clear and concise description of what the problem is. -->
|
||||
|
||||
<!-- e.g. [I'm always frustrated when ...] -->
|
||||
|
||||
## Describe the solution you'd like
|
||||
|
||||
<!-- A clear and concise description of what you want to happen. -->
|
||||
|
||||
## Describe alternatives you've considered
|
||||
|
||||
<!-- A clear and concise description of any alternative solutions or features you've considered. -->
|
||||
|
||||
## Additional context
|
||||
|
||||
<!-- Add any other context about the feature request here. -->
|
32
.github/ISSUE_TEMPLATE/feature_request.yml
vendored
Normal file
32
.github/ISSUE_TEMPLATE/feature_request.yml
vendored
Normal file
|
@ -0,0 +1,32 @@
|
|||
name: Feature request
|
||||
description: Suggest an idea for nf-core/modules
|
||||
labels: feature
|
||||
title: "[FEATURE]"
|
||||
body:
|
||||
- type: textarea
|
||||
id: description
|
||||
attributes:
|
||||
label: Is your feature request related to a problem? Please describe
|
||||
description: A clear and concise description of what the bug is.
|
||||
placeholder: |
|
||||
<!-- e.g. [I'm always frustrated when ...] -->
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: textarea
|
||||
id: solution
|
||||
attributes:
|
||||
label: Describe the solution you'd like
|
||||
description: A clear and concise description of the solution you want to happen.
|
||||
|
||||
- type: textarea
|
||||
id: alternatives
|
||||
attributes:
|
||||
label: Describe alternatives you've considered
|
||||
description: A clear and concise description of any alternative solutions or features you've considered.
|
||||
|
||||
- type: textarea
|
||||
id: additional_context
|
||||
attributes:
|
||||
label: Additional context
|
||||
description: Add any other context about the feature request here.
|
26
.github/ISSUE_TEMPLATE/new_module.md
vendored
26
.github/ISSUE_TEMPLATE/new_module.md
vendored
|
@ -1,26 +0,0 @@
|
|||
---
|
||||
name: New module
|
||||
about: Suggest a new module for nf-core/modules
|
||||
title: "new module: TOOL/SUBTOOL"
|
||||
label: new module
|
||||
---
|
||||
|
||||
<!--
|
||||
# nf-core/modules new module suggestion
|
||||
|
||||
Hi there!
|
||||
|
||||
Thanks for suggesting a new module for the modules!
|
||||
Please delete this text and anything that's not relevant from the template below:
|
||||
|
||||
Replace TOOL with the bioconda name for the tool in the following text, so that the link is functional.
|
||||
|
||||
Replace TOOL/SUBTOOL in the issue title so that it's understandable.
|
||||
-->
|
||||
|
||||
I think it would be good to have a module for [TOOL](https://bioconda.github.io/recipes/TOOL/README.html)
|
||||
|
||||
- [ ] This module does not exist yet with the [`nf-core modules list`](https://github.com/nf-core/tools#list-modules) command
|
||||
- [ ] There is no [open pull request](https://github.com/nf-core/modules/pulls) for this module
|
||||
- [ ] There is no [open issue](https://github.com/nf-core/modules/issues) for this module
|
||||
- [ ] If I'm planning to work on this module, I added myself to the `Assignees` to facilitate tracking who is working on the module
|
36
.github/ISSUE_TEMPLATE/new_module.yml
vendored
Normal file
36
.github/ISSUE_TEMPLATE/new_module.yml
vendored
Normal file
|
@ -0,0 +1,36 @@
|
|||
name: New module
|
||||
description: Suggest a new module for nf-core/modules
|
||||
title: "new module: TOOL/SUBTOOL"
|
||||
labels: new module
|
||||
body:
|
||||
- type: checkboxes
|
||||
attributes:
|
||||
label: Is there an existing module for this?
|
||||
description: This module does not exist yet with the [`nf-core modules list`](https://github.com/nf-core/tools#list-modules) command
|
||||
options:
|
||||
- label: I have searched for the existing module
|
||||
required: true
|
||||
|
||||
- type: checkboxes
|
||||
attributes:
|
||||
label: Is there an open PR for this?
|
||||
description: There is no [open pull request](https://github.com/nf-core/modules/pulls) for this module
|
||||
options:
|
||||
- label: I have searched for existing PRs
|
||||
required: true
|
||||
|
||||
- type: checkboxes
|
||||
attributes:
|
||||
label: Is there an open issue for this?
|
||||
description: There is no [open issue](https://github.com/nf-core/modules/issues) for this module
|
||||
options:
|
||||
- label: I have searched for existing issues
|
||||
required: true
|
||||
|
||||
- type: checkboxes
|
||||
attributes:
|
||||
label: Are you going to work on this?
|
||||
description: If I'm planning to work on this module, I added myself to the `Assignees` to facilitate tracking who is working on the module
|
||||
options:
|
||||
- label: If I'm planning to work on this module, I added myself to the `Assignees` to facilitate tracking who is working on the module
|
||||
required: false
|
|
@ -32,8 +32,8 @@ input:
|
|||
description: loci file <CHR><tab><POS1>
|
||||
pattern: "*.{tsv}"
|
||||
- fasta:
|
||||
type: file
|
||||
description: Input genome fasta file. Required when passing CRAM files.
|
||||
type: file
|
||||
description: Input genome fasta file. Required when passing CRAM files.
|
||||
|
||||
output:
|
||||
- meta:
|
||||
|
|
41
modules/amplify/predict/main.nf
Normal file
41
modules/amplify/predict/main.nf
Normal file
|
@ -0,0 +1,41 @@
|
|||
def VERSION = '1.0.3' // Version information not provided by tool
|
||||
|
||||
process AMPLIFY_PREDICT {
|
||||
tag "$meta.id"
|
||||
label 'process_low'
|
||||
|
||||
conda (params.enable_conda ? "bioconda::amplify=1.0.3" : null)
|
||||
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
|
||||
'https://depot.galaxyproject.org/singularity/amplify:1.0.3--py36hdfd78af_0':
|
||||
'quay.io/biocontainers/amplify:1.0.3--py36hdfd78af_0' }"
|
||||
|
||||
input:
|
||||
tuple val(meta), path(faa)
|
||||
path(model_dir)
|
||||
|
||||
output:
|
||||
tuple val(meta), path('*.tsv'), emit: tsv
|
||||
path "versions.yml" , emit: versions
|
||||
|
||||
when:
|
||||
task.ext.when == null || task.ext.when
|
||||
|
||||
script:
|
||||
def args = task.ext.args ?: ''
|
||||
def prefix = task.ext.prefix ?: "${meta.id}"
|
||||
def custom_model_dir = model_dir ? "-md ${model_dir}" : ""
|
||||
"""
|
||||
AMPlify \\
|
||||
$args \\
|
||||
${custom_model_dir} \\
|
||||
-s '${faa}'
|
||||
|
||||
#rename output, because tool includes date and time in name
|
||||
mv *.tsv ${prefix}.tsv
|
||||
|
||||
cat <<-END_VERSIONS > versions.yml
|
||||
"${task.process}":
|
||||
AMPlify: $VERSION
|
||||
END_VERSIONS
|
||||
"""
|
||||
}
|
47
modules/amplify/predict/meta.yml
Normal file
47
modules/amplify/predict/meta.yml
Normal file
|
@ -0,0 +1,47 @@
|
|||
name: "amplify_predict"
|
||||
description: AMPlify is an attentive deep learning model for antimicrobial peptide prediction.
|
||||
keywords:
|
||||
- antimicrobial peptides
|
||||
- AMPs
|
||||
- prediction
|
||||
- model
|
||||
tools:
|
||||
- "amplify":
|
||||
description: "Attentive deep learning model for antimicrobial peptide prediction"
|
||||
homepage: "https://github.com/bcgsc/AMPlify"
|
||||
documentation: "https://github.com/bcgsc/AMPlify"
|
||||
tool_dev_url: "https://github.com/bcgsc/AMPlify"
|
||||
doi: "https://doi.org/10.1186/s12864-022-08310-4"
|
||||
licence: "['GPL v3']"
|
||||
|
||||
input:
|
||||
- meta:
|
||||
type: map
|
||||
description: |
|
||||
Groovy Map containing sample information
|
||||
e.g. [ id:'test', single_end:false ]
|
||||
- faa:
|
||||
type: file
|
||||
description: amino acid sequences fasta
|
||||
pattern: "*.{fa,fa.gz,faa,faa.gz,fasta,fasta.gz}"
|
||||
- model_dir:
|
||||
type: directory
|
||||
description: Directory of where models are stored (optional)
|
||||
|
||||
output:
|
||||
- meta:
|
||||
type: map
|
||||
description: |
|
||||
Groovy Map containing sample information
|
||||
e.g. [ id:'test', single_end:false ]
|
||||
- versions:
|
||||
type: file
|
||||
description: File containing software versions
|
||||
pattern: "versions.yml"
|
||||
- tsv:
|
||||
type: file
|
||||
description: amino acid sequences with prediction (AMP, non-AMP) and probability scores
|
||||
pattern: "*.{tsv}"
|
||||
|
||||
authors:
|
||||
- "@louperelo"
|
56
modules/antismash/antismashlitedownloaddatabases/main.nf
Normal file
56
modules/antismash/antismashlitedownloaddatabases/main.nf
Normal file
|
@ -0,0 +1,56 @@
|
|||
process ANTISMASH_ANTISMASHLITEDOWNLOADDATABASES {
|
||||
label 'process_low'
|
||||
|
||||
conda (params.enable_conda ? "bioconda::antismash-lite=6.0.1" : null)
|
||||
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
|
||||
'https://depot.galaxyproject.org/singularity/antismash-lite:6.0.1--pyhdfd78af_1' :
|
||||
'quay.io/biocontainers/antismash-lite:6.0.1--pyhdfd78af_1' }"
|
||||
|
||||
/*
|
||||
These files are normally downloaded/created by download-antismash-databases itself, and must be retrieved for input by manually running the command with conda or a standalone installation of antiSMASH. Therefore we do not recommend using this module for production pipelines, but rather require users to specify their own local copy of the antiSMASH database in pipelines. This is solely for use for CI tests of the nf-core/module version of antiSMASH.
|
||||
Reason: Upon execution, the tool checks if certain database files are present within the container and if not, it tries to create them in /usr/local/bin, for which only root user has write permissions. Mounting those database files with this module prevents the tool from trying to create them.
|
||||
These files are also emitted as output channels in this module to enable the antismash-lite module to use them as mount volumes to the docker/singularity containers.
|
||||
*/
|
||||
|
||||
containerOptions {
|
||||
workflow.containerEngine == 'singularity' ?
|
||||
"-B $database_css:/usr/local/lib/python3.8/site-packages/antismash/outputs/html/css,$database_detection:/usr/local/lib/python3.8/site-packages/antismash/detection,$database_modules:/usr/local/lib/python3.8/site-packages/antismash/modules" :
|
||||
workflow.containerEngine == 'docker' ?
|
||||
"-v \$PWD/$database_css:/usr/local/lib/python3.8/site-packages/antismash/outputs/html/css -v \$PWD/$database_detection:/usr/local/lib/python3.8/site-packages/antismash/detection -v \$PWD/$database_modules:/usr/local/lib/python3.8/site-packages/antismash/modules" :
|
||||
''
|
||||
}
|
||||
|
||||
input:
|
||||
path database_css
|
||||
path database_detection
|
||||
path database_modules
|
||||
|
||||
output:
|
||||
path("antismash_db") , emit: database
|
||||
path("antismash_dir"), emit: antismash_dir
|
||||
path "versions.yml", emit: versions
|
||||
|
||||
when:
|
||||
task.ext.when == null || task.ext.when
|
||||
|
||||
script:
|
||||
def args = task.ext.args ?: ''
|
||||
conda = params.enable_conda
|
||||
"""
|
||||
download-antismash-databases \\
|
||||
--database-dir antismash_db \\
|
||||
$args
|
||||
|
||||
if [[ $conda = false ]]; \
|
||||
then \
|
||||
cp -r /usr/local/lib/python3.8/site-packages/antismash antismash_dir; \
|
||||
else \
|
||||
cp -r \$(python -c 'import antismash;print(antismash.__file__.split("/__")[0])') antismash_dir; \
|
||||
fi
|
||||
|
||||
cat <<-END_VERSIONS > versions.yml
|
||||
"${task.process}":
|
||||
antismash-lite: \$(antismash --version | sed 's/antiSMASH //')
|
||||
END_VERSIONS
|
||||
"""
|
||||
}
|
60
modules/antismash/antismashlitedownloaddatabases/meta.yml
Normal file
60
modules/antismash/antismashlitedownloaddatabases/meta.yml
Normal file
|
@ -0,0 +1,60 @@
|
|||
name: antismash_antismashlitedownloaddatabases
|
||||
description: antiSMASH allows the rapid genome-wide identification, annotation and analysis of secondary metabolite biosynthesis gene clusters. This module downloads the antiSMASH databases.
|
||||
keywords:
|
||||
- secondary metabolites
|
||||
- BGC
|
||||
- biosynthetic gene cluster
|
||||
- genome mining
|
||||
- NRPS
|
||||
- RiPP
|
||||
- antibiotics
|
||||
- prokaryotes
|
||||
- bacteria
|
||||
- eukaryotes
|
||||
- fungi
|
||||
- antismash
|
||||
- database
|
||||
tools:
|
||||
- antismash:
|
||||
description: antiSMASH - the antibiotics and Secondary Metabolite Analysis SHell
|
||||
homepage: https://docs.antismash.secondarymetabolites.org
|
||||
documentation: https://docs.antismash.secondarymetabolites.org
|
||||
tool_dev_url: https://github.com/antismash/antismash
|
||||
doi: "10.1093/nar/gkab335"
|
||||
licence: ["AGPL v3"]
|
||||
|
||||
input:
|
||||
- database_css:
|
||||
type: directory
|
||||
description: |
|
||||
antismash/outputs/html/css folder which is being created during the antiSMASH database downloading step. These files are normally downloaded by download-antismash-databases itself, and must be retrieved by the user by manually running the command with conda or a standalone installation of antiSMASH. Therefore we do not recommend using this module for production pipelines, but rather require users to specify their own local copy of the antiSMASH database in pipelines.
|
||||
pattern: "css"
|
||||
- database_detection:
|
||||
type: directory
|
||||
description: |
|
||||
antismash/detection folder which is being created during the antiSMASH database downloading step. These files are normally downloaded by download-antismash-databases itself, and must be retrieved by the user by manually running the command with conda or a standalone installation of antiSMASH. Therefore we do not recommend using this module for production pipelines, but rather require users to specify their own local copy of the antiSMASH database in pipelines.
|
||||
pattern: "detection"
|
||||
- database_modules:
|
||||
type: directory
|
||||
description: |
|
||||
antismash/modules folder which is being created during the antiSMASH database downloading step. These files are normally downloaded by download-antismash-databases itself, and must be retrieved by the user by manually running the command with conda or a standalone installation of antiSMASH. Therefore we do not recommend using this module for production pipelines, but rather require users to specify their own local copy of the antiSMASH database in pipelines.
|
||||
pattern: "modules"
|
||||
|
||||
output:
|
||||
- versions:
|
||||
type: file
|
||||
description: File containing software versions
|
||||
pattern: "versions.yml"
|
||||
|
||||
- database:
|
||||
type: directory
|
||||
description: Download directory for antiSMASH databases
|
||||
pattern: "antismash_db"
|
||||
- antismash_dir:
|
||||
type: directory
|
||||
description: |
|
||||
antismash installation folder which is being modified during the antiSMASH database downloading step. The modified files are normally downloaded by download-antismash-databases itself, and must be retrieved by the user by manually running the command with conda or a standalone installation of antiSMASH. Therefore we do not recommend using this module for production pipelines, but rather require users to specify their own local copy of the antiSMASH database and installation folder in pipelines.
|
||||
pattern: "antismash_dir"
|
||||
|
||||
authors:
|
||||
- "@jasmezz"
|
|
@ -2,10 +2,10 @@ process BAMTOOLS_SPLIT {
|
|||
tag "$meta.id"
|
||||
label 'process_low'
|
||||
|
||||
conda (params.enable_conda ? "bioconda::bamtools=2.5.1" : null)
|
||||
conda (params.enable_conda ? "bioconda::bamtools=2.5.2" : null)
|
||||
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
|
||||
'https://depot.galaxyproject.org/singularity/bamtools:2.5.1--h9a82719_9' :
|
||||
'quay.io/biocontainers/bamtools:2.5.1--h9a82719_9' }"
|
||||
'https://depot.galaxyproject.org/singularity/bamtools:2.5.2--hd03093a_0' :
|
||||
'quay.io/biocontainers/bamtools:2.5.2--hd03093a_0' }"
|
||||
|
||||
input:
|
||||
tuple val(meta), path(bam)
|
||||
|
@ -20,11 +20,15 @@ process BAMTOOLS_SPLIT {
|
|||
script:
|
||||
def args = task.ext.args ?: ''
|
||||
def prefix = task.ext.prefix ?: "${meta.id}"
|
||||
def input_list = bam.collect{"-in $it"}.join(' ')
|
||||
"""
|
||||
bamtools \\
|
||||
split \\
|
||||
-in $bam \\
|
||||
$args
|
||||
merge \\
|
||||
$input_list \\
|
||||
| bamtools \\
|
||||
split \\
|
||||
-stub $prefix \\
|
||||
$args
|
||||
|
||||
cat <<-END_VERSIONS > versions.yml
|
||||
"${task.process}":
|
||||
|
|
|
@ -23,7 +23,7 @@ input:
|
|||
e.g. [ id:'test', single_end:false ]
|
||||
- bam:
|
||||
type: file
|
||||
description: A BAM file to split
|
||||
description: A list of one or more BAM files to merge and then split
|
||||
pattern: "*.bam"
|
||||
|
||||
output:
|
||||
|
@ -43,3 +43,4 @@ output:
|
|||
|
||||
authors:
|
||||
- "@sguizard"
|
||||
- "@matthdsm"
|
||||
|
|
2
modules/bclconvert/.gitignore
vendored
Normal file
2
modules/bclconvert/.gitignore
vendored
Normal file
|
@ -0,0 +1,2 @@
|
|||
bcl-convert
|
||||
*.rpm
|
15
modules/bclconvert/Dockerfile
Normal file
15
modules/bclconvert/Dockerfile
Normal file
|
@ -0,0 +1,15 @@
|
|||
# Dockerfile to create container with bcl-convert
|
||||
# Push to nfcore/bclconvert:<VER>
|
||||
|
||||
FROM debian:bullseye-slim
|
||||
LABEL authors="Matthias De Smet <matthias.desmet@ugent.be>" \
|
||||
description="Docker image containing bcl-convert"
|
||||
# Disclaimer: this container is not provided nor supported by Illumina
|
||||
# 'ps' command is need by some nextflow executions to collect system stats
|
||||
# Install procps and clean apt cache
|
||||
RUN apt-get update \
|
||||
&& apt-get install -y \
|
||||
procps \
|
||||
&& apt-get clean -y && rm -rf /var/lib/apt/lists/*
|
||||
COPY bcl-convert /usr/local/bin/bcl-convert
|
||||
RUN chmod +x /usr/local/bin/bcl-convert
|
30
modules/bclconvert/LICENSE
Normal file
30
modules/bclconvert/LICENSE
Normal file
|
@ -0,0 +1,30 @@
|
|||
ILLUMINA END-USER SOFTWARE LICENSE AGREEMENT
|
||||
|
||||
IMPORTANT-READ CAREFULLY. THIS IS A LICENSE AGREEMENT THAT YOU ARE REQUIRED TO ACCEPT BEFORE, DOWNLOADING, INSTALLING AND USING ANY SOFTWARE MADE AVAILABLE FROM THE ILLUMINA SUPPORT CENTER (https://support.illumina.com).
|
||||
|
||||
CAREFULLY READ ALL THE TERMS AND CONDITIONS OF THIS LICENSE AGREEMENT BEFORE PROCEEDING WITH DOWNLOADING, INSTALLING, AND/OR USING THE SOFTWARE. YOU ARE NOT PERMITTED TO DOWNLOAD, INSTALL, AND/OR USE THE SOFTWARE UNTIL YOU HAVE AGREED TO BE BOUND BY ALL OF THE TERMS AND CONDITIONS OF THIS LICENSE AGREEMENT. YOU REPRESENT AND WARRANT THAT YOU ARE DULY AUTHORIZED TO ACCEPT THE TERMS AND CONDITIONS OF THIS LICENSE AGREEMENT ON BEHALF OF YOUR EMPLOYER.
|
||||
|
||||
Software made available through the Illumina Support Center is licensed, not sold, to you. Your license to each software program made available through the Illumina Support Center is subject to your prior acceptance of either this Illumina End-User Software License Agreement (“Agreement”), or a custom end user license agreement (“Custom EULA”), if one is provided with the software. Any software that is subject to this Agreement is referred to herein as the “Software.” By accepting this Agreement, you agree the terms and conditions of this Agreement will apply to and govern any and all of your downloads, installations, and uses of each Illumina software program made available through the Illumina Support Center, except that your download, installation, and use of any software provided with a Custom EULA will be governed by the terms and conditions of the Custom EULA.
|
||||
|
||||
This Agreement is made and entered into by and between Illumina, Inc., a Delaware corporation, having offices at 5200 Illumina Way, San Diego, CA 92122 (“Illumina”) and you as the end-user of the Software (hereinafter, “Licensee” or “you”). All software, firmware, and associated media, printed materials, and online and electronic documentation, including any updates or upgrades thereof, made available through the Illumina Support Center (collectively, “Software”) provided to Licensee are for use solely by Licensee and the provisions herein WILL apply with respect to such Software.
|
||||
|
||||
License Grant. Subject to the terms and conditions of this Agreement, Illumina grants to Licensee, under the following terms and conditions, a personal, non-exclusive, revocable, non-transferable, non-sublicensable license, for its internal end-use purposes only, in the ordinary course of Licensee’s business to use the Software in executable object code form only, solely at the Licensee’s facility to, install and use the Software on a single computer accessible only by Licensee (and not on any public network or server), where the single computer is owned, leased, or otherwise substantially controlled by Licensee, for the purpose of processing and analyzing data generated from an Illumina genetic sequencing instrument owned and operated solely by Licensee (the “Product”). In the case of Software provided by Illumina in non-compiled form, Illumina grants Licensee a personal, non-exclusive, non-sublicenseable, restricted right to compile, install, and use one copy of the Software solely for processing and analyzing data generated from the Product.
|
||||
License Restrictions. Except as expressly permitted in Section 1, Licensee may not make, have made, import, use, copy, reproduce, distribute, display, publish, sell, re-sell, lease, or sub-license the Software, in whole or in part, except as expressly provided for in this Agreement. Licensee may not modify, improve, translate, reverse engineer, decompile, disassemble, or create derivative works of the Software or otherwise attempt to (a) defeat, avoid, by-pass, remove, deactivate, or otherwise circumvent any software protection mechanisms in the Software including, without limitation, any such mechanism used to restrict or control the functionality of the Software, or (b) derive the source code or the underlying ideas, algorithms, structure, or organization form of the Software. Licensee will not allow, at any time, including during and after the term of the license, the Software or any portions or copies thereof in any form to become available to any third parties. Licensee may use the Software solely with genomic data that is generated using the Product; Licensee may not use the Software with any data generated from other products or instruments. Licensee may not use the Software to perform any data analysis services for any third party.
|
||||
Ownership. The Software is protected by United States and international intellectual property laws. All right, title, and interest in and to the Software (including associated intellectual property rights) are and will remain vested in Illumina or Illumina’s affiliated companies or licensors. Licensee acknowledges that no rights, license or interest to any Illumina trademarks are granted hereunder. Licensee acknowledges that unauthorized reproduction or distribution of the Software, or any portion of it, may result in severe civil and criminal penalties. Illumina reserves all rights in and to the Software not expressly granted to Licensee under this Agreement.
|
||||
Upgrades/Updates. Illumina may, at its sole discretion, provide updates or upgrades to the Software. In that case, Licensee WILL have the same rights and obligations under such updates or upgrades as it has for the versions of the Software initially provided to Licensee hereunder. Licensee recognizes that Illumina is not obligated to provide any upgrades or updates to, or support for, the Software.
|
||||
Data Integrity/Loss. Licensee is responsible for the integrity and availability, including preventing the loss of data that Licensee generates, uses, analyzes, manages, or stores in connection with or through its use of the Software, including without limitation, investigating and implementing industry appropriate policies and procedures regarding the provision of access to Licensee’s data, monitoring access and use of Licensee’s data, conducting routine backups and archiving of Licensee’s data, and ensuring the adequacy of anti-virus software. Accordingly, Licensee agrees that Illumina is not responsible for any inability to access, loss or corruption of data as a result of Licensee’s use of the Software, and Illumina has no liability to Licensee in connection with such inability to access, loss or corruption of data.
|
||||
Term of License. This Agreement will be in effect from the time Licensee expressly accepts the terms and conditions of this license, or otherwise installs the Software, thereby accepting the terms and conditions contained herein, and will remain in effect until terminated. This license will otherwise terminate upon the conditions set forth in this Agreement, if revoked by Illumina, or if Licensee fails to comply with any term or condition of this Agreement including failure to pay any applicable license fee. Licensee agrees upon termination of this Agreement for any reason to immediately discontinue use of and un-install the Software and destroy all copies of the Software in its possession and/or under its control, and return or destroy, at Illumina’s option, any compact disks, floppy disks or other media provided by Illumina storing the Software thereon (together with any authorized copies thereof), as well as any documentation associated therewith
|
||||
Limited Warranty. Illumina warrants that, for a period of 6 months from the date of download or installation of the Software by Licensee, the Software will perform in all material respects in accordance with the accompanying documentation available on the Illumina Support Center. EXCEPT AND TO THE EXTENT EXPRESSLY PROVIDED IN THE FOREGOING, AND TO THE FULLEST EXTENT PERMITTED BY APPLICABLE LAW, THE SOFTWARE IS PROVIDED “AS IS” AND ILLUMINA EXPRESSLY DISCLAIMS ALL WARRANTIES AND CONDITIONS REGARDING THE SOFTWARE AND RESULTS GENERATED BY THE SOFTWARE, INCLUDING WITHOUT LIMITATION, TO THE FULLEST EXTENT PERMITTED BY APPLICABLE LAW, ALL OTHER EXPRESS OR IMPLIED WARRANTIES OR CONDITIONS OF MERCHANTABLE QUALITY, NON-INFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE, AND THOSE ARISING BY STATUTE OR OTHERWISE IN LAW OR FROM A COURSE OF DEALING OR USAGE OF TRADE. ILLUMINA DOES NOT WARRANT THAT THE FUNCTIONS CONTAINED IN THE SOFTWARE WILL MEET LICENSEE"S REQUIREMENTS, OR THAT THE OPERATION OF THE SOFTWARE WILL BE ERROR FREE OR UNINTERRUPTED.
|
||||
Limitation of Liability.
|
||||
(a) ILLUMINA’S ENTIRE LIABILITY AND LICENSEE"S EXCLUSIVE REMEDY UNDER THE LIMITED WARRANTY PROVISION OF SECTION 7 ABOVE WILL BE, AT ILLUMINA’S OPTION, EITHER (i) RETURN OF THE PRICE PAID FOR THE SOFTWARE, OR (ii) REPAIR OR REPLACEMENT OF THE PORTIONS OF THE SOFTWARE THAT DO NOT COMPLY WITH ILLUMINA’S LIMITED WARRANTY. THIS LIMITED WARRANTY IS VOID AND ILLUMINA WILL HAVE NO LIABILITY AT ALL IF FAILURE OF THE SOFTWARE TO COMPLY WITH ILLUMINA LIMITED WARRANTY HAS RESULTED FROM: (w) FAILURE TO USE THE SOFTWARE IN ACCORDANCE WITH ILLUMINA’S THEN CURRENT USER MANUAL OR THIS AGREEMENT; (x) ACCIDENT, ABUSE, OR MISAPPLICATION; (y) PRODUCTS OR EQUIPMENT NOT SPECIFIED BY ILLUMINA AS BEING COMPATIBLE WITH THE SOFTWARE; OR (z) IF LICENSEE HAS NOT NOTIFIED ILLUMINA IN WRITING OF THE DEFECT WITHIN THE ABOVE WARRANTY PERIOD.
|
||||
|
||||
(b) TO THE FULLEST EXTENT PERMITTED BY APPLICABLE LAW, IN NO EVENT WILL ILLUMINA BE LIABLE UNDER ANY THEORY OF CONTRACT, TORT, STRICT LIABILITY OR OTHER LEGAL OR EQUITABLE THEORY FOR ANY PERSONAL INJURY OR ANY INDIRECT, CONSEQUENTIAL, OR INCIDENTAL DAMAGES, EVEN IF ILLUMINA HAS BEEN ADVISED OF THE POSSIBILITY THEREOF INCLUDING, WITHOUT LIMITATION, LOST PROFITS, LOST DATA, INTERRUPTION OF BUSINESS, LOST BUSINESS REVENUE, OTHER ECONOMIC LOSS, OR ANY LOSS OF RECORDED DATA ARISING OUT OF THE USE OF OR INABILITY TO USE THE SOFTWARE. EXCEPT AND TO THE EXTENT EXPRESSLY PROVIDED IN SECTION 7 AND 8(a) ABOVE OR AS OTHERWISE PERMITTED BY LAW, IN NO EVENT WILL ILLUMINA’S TOTAL LIABILITY TO LICENSEE FOR ALL DAMAGES (OTHER THAN AS MAY BE REQUIRED BY APPLICABLE LAW IN CASES INVOLVING PERSONAL INJURY) EXCEED THE AMOUNT OF $500 USD. THE FOREGOING LIMITATIONS WILL APPLY EVEN IF THE ABOVE STATED REMEDY FAILS OF ITS ESSENTIAL PURPOSE.
|
||||
|
||||
Survival. The limitations of liability and ownership rights of Illumina contained herein and Licensee’s obligations following termination of this Agreement WILL survive the termination of this Agreement for any reason.
|
||||
Research Use Only. The Software is labeled with a For Research Use Only or similar labeling statement and the performance characteristics of the Software have not been established and the Software is not for use in diagnostic procedures. Licensee acknowledges and agrees that (i) the Software has not been approved, cleared, or licensed by the United States Food and Drug Administration or any other regulatory entity whether foreign or domestic for any specific intended use, whether research, commercial, diagnostic, or otherwise, and (ii) Licensee must ensure it has any regulatory approvals that are necessary for Licensee’s intended uses of the Software. Licensee will comply with all applicable laws and regulations when using and maintaining the Software.
|
||||
General. Licensee may not sublicense, assign, share, pledge, rent or transfer any of its rights under this Agreement in relation to the Software or any portion thereof including documentation. Illumina reserves the right to change this Agreement at any time. When Illumina makes any changes, Illumina will provide the updated Agreement, or a link to it, on Illumina’s website (www.illumina.com) and such updated Agreement WILL become effective immediately. Licensee’s continued access to or use of the Software represents Licensee’s agreement to any revised Agreement. If one or more provisions of this Agreement are found to be invalid or unenforceable, this Agreement WILL not be rendered inoperative but the remaining provisions WILL continue in full force and effect. This Agreement constitutes the entire agreement between the parties with respect to the subject matter of this Agreement and merges all prior communications except that a “hard-copy” form of licensing agreement relating to the Software previously agreed to in writing by Illumina and Licensee WILL supersede and govern in the event of any conflicting provisions.
|
||||
Governing Law. This Agreement WILL be governed by and construed in accordance with the laws of the state of California, USA, without regard to its conflicts of laws principles, and independent of where a suit or action hereunder may be filed.
|
||||
U.S. Government End Users. If Licensee is a branch agency or instrumentality of the United States Government, the following provision applies. The Software is a “commercial item” as that term is defined at 48 C.F.R. 2.101, consisting of “commercial computer software” and “commercial computer software documentation,” as such terms are used in 48 C.F.R. 12.212 or 48 C.F.R. 227.7202 (as applicable). Consistent with 48 C.F.R. 12.212 and 48 C.F.R. 227.7202-1 through 227.7202-4, all United States Government end users acquire the Software with only those rights set forth herein.
|
||||
Contact. Any questions regarding legal rights, duties, obligations, or restrictions associated with the software hereunder should be directed to Illumina, Inc., 5200 Illumina Way, San Diego, CA 92122, Attention: Legal Department, Phone: (858) 202-4500, Fax: (858) 202-4599, web site: www.illumina.com <http://www.illumina.com>.
|
||||
Third Party Components. The Software may include third party software (“Third Party Programs”). Some of the Third Party Programs are available under open source or free software licenses. The License Agreement accompanying the Licensed Software does not alter any rights or obligations Licensee may have under those open source or free software licenses. The licenses that govern the terms and conditions of use of the Third Party Programs included in the Licensed Software are provided in the READ ME provided with the Software. The READ ME also contains copyright statements for the various open source software components (or portions thereof) that are distributed with the Licensed Software.
|
||||
END OF END-USER SOFTWARE LICENSE AGREEMENT.
|
17
modules/bclconvert/README.md
Normal file
17
modules/bclconvert/README.md
Normal file
|
@ -0,0 +1,17 @@
|
|||
# Updating the docker container and making a new module release
|
||||
|
||||
bcl-convert is a commercial tool from Illumina. The container provided for the bcl-convert nf-core module is not provided nor supported by Illumina. Updating the bcl-convert versions in the container and pushing the update to Dockerhub needs to be done manually.
|
||||
|
||||
1. Navigate to the appropriate download page. - [BCL Convert](https://support.illumina.com/sequencing/sequencing_software/bcl-convert/downloads.html): download the rpm of the desired bcl-convert version with `curl` or `wget`.
|
||||
2. Unpack the RPM package using `rpm2cpio bcl-convert-*.rpm | cpio -i --make-directories`. Place the executable located in `<unpack_dir>/usr/bin/bcl-convert` in the same folder where the Dockerfile lies.
|
||||
3. Create and test the container:
|
||||
|
||||
```bash
|
||||
docker build . -t nfcore/bclconvert:<VERSION>
|
||||
```
|
||||
|
||||
4. Access rights are needed to push the container to the Dockerhub nfcore organization, please ask a core team member to do so.
|
||||
|
||||
```bash
|
||||
docker push nfcore/bclconvert:<VERSION>
|
||||
```
|
81
modules/bclconvert/main.nf
Normal file
81
modules/bclconvert/main.nf
Normal file
|
@ -0,0 +1,81 @@
|
|||
process BCLCONVERT {
|
||||
tag '$samplesheet'
|
||||
label 'process_high'
|
||||
|
||||
if (params.enable_conda) {
|
||||
exit 1, "Conda environments cannot be used when using bcl-convert. Please use docker or singularity containers."
|
||||
}
|
||||
container "nfcore/bclconvert:3.9.3"
|
||||
|
||||
input:
|
||||
path samplesheet
|
||||
path run_dir
|
||||
|
||||
output:
|
||||
path "*.fastq.gz" ,emit: fastq
|
||||
path "Reports/*.{csv,xml,bin}" ,emit: reports
|
||||
path "Logs/*.{log,txt}" ,emit: logs
|
||||
path "InterOp/*.bin" ,emit: interop
|
||||
path "versions.yml" ,emit: versions
|
||||
|
||||
when:
|
||||
task.ext.when == null || task.ext.when
|
||||
|
||||
script:
|
||||
def args = task.ext.args ?: ''
|
||||
|
||||
"""
|
||||
bcl-convert \
|
||||
$args \\
|
||||
--output-directory . \\
|
||||
--bcl-input-directory ${run_dir} \\
|
||||
--sample-sheet ${samplesheet} \\
|
||||
--bcl-num-parallel-tiles ${task.cpus}
|
||||
|
||||
mkdir InterOp
|
||||
cp ${run_dir}/InterOp/*.bin InterOp/
|
||||
mv Reports/*.bin InterOp/
|
||||
|
||||
cat <<-END_VERSIONS > versions.yml
|
||||
"${task.process}":
|
||||
bclconvert: \$(bcl-convert -V 2>&1 | head -n 1 | sed 's/^.*Version //')
|
||||
END_VERSIONS
|
||||
"""
|
||||
|
||||
stub:
|
||||
"""
|
||||
echo "sample1_S1_L001_R1_001" > sample1_S1_L001_R1_001.fastq.gz
|
||||
echo "sample1_S1_L001_R2_001" > sample1_S1_L001_R2_001.fastq.gz
|
||||
echo "sample1_S1_L002_R1_001" > sample1_S1_L002_R1_001.fastq.gz
|
||||
echo "sample1_S1_L002_R2_001" > sample1_S1_L002_R2_001.fastq.gz
|
||||
echo "sample2_S2_L001_R1_001" > sample2_S2_L001_R1_001.fastq.gz
|
||||
echo "sample2_S2_L001_R2_001" > sample2_S2_L001_R2_001.fastq.gz
|
||||
echo "sample2_S2_L002_R1_001" > sample2_S2_L002_R1_001.fastq.gz
|
||||
echo "sample2_S2_L002_R2_001" > sample2_S2_L002_R2_001.fastq.gz
|
||||
|
||||
mkdir Reports
|
||||
echo "Adapter_Metrics" > Reports/Adapter_Metrics.csv
|
||||
echo "Demultiplex_Stats" > Reports/Demultiplex_Stats.csv
|
||||
echo "fastq_list" > Reports/fastq_list.csv
|
||||
echo "Index_Hopping_Counts" > Reports/Index_Hopping_Counts.csv
|
||||
echo "IndexMetricsOut" > Reports/IndexMetricsOut.bin
|
||||
echo "Quality_Metrics" > Reports/Quality_Metrics.csv
|
||||
echo "RunInfo" > Reports/RunInfo.xml
|
||||
echo "SampleSheet" > Reports/SampleSheet.csv
|
||||
echo "Top_Unknown_Barcodes" > Reports/Top_Unknown_Barcodes.csv
|
||||
|
||||
mkdir Logs
|
||||
echo "Errors" > Logs/Errors.log
|
||||
echo "FastqComplete" > Logs/FastqComplete.txt
|
||||
echo "Info" > Logs/Info.log
|
||||
echo "Warnings" > Logs/Warnings.log
|
||||
|
||||
mkdir InterOp/
|
||||
echo "InterOp" > InterOp/InterOp.bin
|
||||
|
||||
cat <<-END_VERSIONS > versions.yml
|
||||
"${task.process}":
|
||||
bclconvert: \$(bcl-convert -V 2>&1 | head -n 1 | sed 's/^.*Version //')
|
||||
END_VERSIONS
|
||||
"""
|
||||
}
|
45
modules/bclconvert/meta.yml
Normal file
45
modules/bclconvert/meta.yml
Normal file
|
@ -0,0 +1,45 @@
|
|||
name: "bclconvert"
|
||||
description: Demultiplex Illumina BCL files
|
||||
keywords:
|
||||
- demultiplex
|
||||
- illumina
|
||||
- fastq
|
||||
tools:
|
||||
- "bclconvert":
|
||||
description: "Demultiplex Illumina BCL files"
|
||||
homepage: "https://support.illumina.com/sequencing/sequencing_software/bcl-convert.html"
|
||||
documentation: "https://support-docs.illumina.com/SW/BCL_Convert/Content/SW/FrontPages/BCL_Convert.htm"
|
||||
licence: "ILLUMINA"
|
||||
|
||||
input:
|
||||
- samplesheet:
|
||||
type: file
|
||||
description: "Input samplesheet"
|
||||
pattern: "*.{csv}"
|
||||
- run_dir:
|
||||
type: directory
|
||||
description: "Input run directory containing RunInfo.xml and BCL data"
|
||||
|
||||
output:
|
||||
- versions:
|
||||
type: file
|
||||
description: File containing software versions
|
||||
pattern: "versions.yml"
|
||||
- fastq:
|
||||
type: file
|
||||
description: Demultiplexed FASTQ files
|
||||
pattern: "*.{fastq.gz}"
|
||||
- reports:
|
||||
type: file
|
||||
description: Demultiplexing Reports
|
||||
pattern: "Reports/*.{csv,xml}"
|
||||
- logs:
|
||||
type: file
|
||||
description: Log files
|
||||
pattern: "Logs/*.{log,txt}"
|
||||
- interop:
|
||||
type: file
|
||||
description: Interop files
|
||||
pattern: "Interop/*.{bin}"
|
||||
authors:
|
||||
- "@matthdsm"
|
|
@ -23,14 +23,12 @@ process BWA_MEM {
|
|||
def args = task.ext.args ?: ''
|
||||
def args2 = task.ext.args2 ?: ''
|
||||
def prefix = task.ext.prefix ?: "${meta.id}"
|
||||
def read_group = meta.read_group ? "-R ${meta.read_group}" : ""
|
||||
def samtools_command = sort_bam ? 'sort' : 'view'
|
||||
"""
|
||||
INDEX=`find -L ./ -name "*.amb" | sed 's/.amb//'`
|
||||
|
||||
bwa mem \\
|
||||
$args \\
|
||||
$read_group \\
|
||||
-t $task.cpus \\
|
||||
\$INDEX \\
|
||||
$reads \\
|
||||
|
|
|
@ -23,7 +23,6 @@ process BWAMEM2_MEM {
|
|||
def args = task.ext.args ?: ''
|
||||
def args2 = task.ext.args2 ?: ''
|
||||
def prefix = task.ext.prefix ?: "${meta.id}"
|
||||
def read_group = meta.read_group ? "-R ${meta.read_group}" : ""
|
||||
def samtools_command = sort_bam ? 'sort' : 'view'
|
||||
"""
|
||||
INDEX=`find -L ./ -name "*.amb" | sed 's/.amb//'`
|
||||
|
@ -31,7 +30,6 @@ process BWAMEM2_MEM {
|
|||
bwa-mem2 \\
|
||||
mem \\
|
||||
$args \\
|
||||
$read_group \\
|
||||
-t $task.cpus \\
|
||||
\$INDEX \\
|
||||
$reads \\
|
||||
|
|
|
@ -4,8 +4,8 @@ process CAT_FASTQ {
|
|||
|
||||
conda (params.enable_conda ? "conda-forge::sed=4.7" : null)
|
||||
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
|
||||
'https://containers.biocontainers.pro/s3/SingImgsRepo/biocontainers/v1.2.0_cv1/biocontainers_v1.2.0_cv1.img' :
|
||||
'biocontainers/biocontainers:v1.2.0_cv1' }"
|
||||
'https://depot.galaxyproject.org/singularity/ubuntu:20.04' :
|
||||
'ubuntu:20.04' }"
|
||||
|
||||
input:
|
||||
tuple val(meta), path(reads, stageAs: "input*/*")
|
||||
|
|
33
modules/centrifuge/kreport/main.nf
Normal file
33
modules/centrifuge/kreport/main.nf
Normal file
|
@ -0,0 +1,33 @@
|
|||
process CENTRIFUGE_KREPORT {
|
||||
tag "$meta.id"
|
||||
label 'process_low'
|
||||
|
||||
conda (params.enable_conda ? "bioconda::centrifuge=1.0.4_beta" : null)
|
||||
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
|
||||
'https://depot.galaxyproject.org/singularity/centrifuge:1.0.4_beta--h9a82719_6':
|
||||
'quay.io/biocontainers/centrifuge:1.0.4_beta--h9a82719_6' }"
|
||||
|
||||
input:
|
||||
tuple val(meta), path(results)
|
||||
path db
|
||||
|
||||
output:
|
||||
tuple val(meta), path('*.txt') , emit: kreport
|
||||
path "versions.yml" , emit: versions
|
||||
|
||||
when:
|
||||
task.ext.when == null || task.ext.when
|
||||
|
||||
script:
|
||||
def args = task.ext.args ?: ''
|
||||
def prefix = task.ext.prefix ?: "${meta.id}"
|
||||
"""
|
||||
db_name=`find -L ${db} -name "*.1.cf" -not -name "._*" | sed 's/.1.cf//'`
|
||||
centrifuge-kreport -x \$db_name ${results} > ${prefix}.txt
|
||||
|
||||
cat <<-END_VERSIONS > versions.yml
|
||||
"${task.process}":
|
||||
centrifuge: \$( centrifuge --version | sed -n 1p | sed 's/^.*centrifuge-class version //')
|
||||
END_VERSIONS
|
||||
"""
|
||||
}
|
41
modules/centrifuge/kreport/meta.yml
Normal file
41
modules/centrifuge/kreport/meta.yml
Normal file
|
@ -0,0 +1,41 @@
|
|||
name: "centrifuge_kreport"
|
||||
description: Creates Kraken-style reports from centrifuge out files
|
||||
keywords:
|
||||
- metagenomics
|
||||
tools:
|
||||
- centrifuge:
|
||||
description: Centrifuge is a classifier for metagenomic sequences.
|
||||
homepage: https://ccb.jhu.edu/software/centrifuge/
|
||||
documentation: https://ccb.jhu.edu/software/centrifuge/manual.shtml
|
||||
doi: 10.1101/gr.210641.116
|
||||
licence: ["GPL v3"]
|
||||
input:
|
||||
- meta:
|
||||
type: map
|
||||
description: |
|
||||
Groovy Map containing sample information
|
||||
e.g. [ id:'test', single_end:false ]
|
||||
- results:
|
||||
type: file
|
||||
description: File containing the centrifuge classification results
|
||||
pattern: "*.{txt}"
|
||||
|
||||
output:
|
||||
- meta:
|
||||
type: map
|
||||
description: |
|
||||
Groovy Map containing sample information
|
||||
e.g. [ id:'test', single_end:false ]
|
||||
- versions:
|
||||
type: file
|
||||
description: File containing software versions
|
||||
pattern: "versions.yml"
|
||||
- kreport:
|
||||
type: file
|
||||
description: |
|
||||
File containing kraken-style report from centrifuge
|
||||
out files.
|
||||
pattern: "*.{txt}"
|
||||
authors:
|
||||
- "@sofstam"
|
||||
- "@jfy133"
|
|
@ -2,10 +2,10 @@ process CUSTOM_GETCHROMSIZES {
|
|||
tag "$fasta"
|
||||
label 'process_low'
|
||||
|
||||
conda (params.enable_conda ? "bioconda::samtools=1.15" : null)
|
||||
conda (params.enable_conda ? "bioconda::samtools=1.15.1" : null)
|
||||
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
|
||||
'https://depot.galaxyproject.org/singularity/samtools:1.15--h1170115_1' :
|
||||
'quay.io/biocontainers/samtools:1.15--h1170115_1' }"
|
||||
'https://depot.galaxyproject.org/singularity/samtools:1.15.1--h1170115_0' :
|
||||
'quay.io/biocontainers/samtools:1.15.1--h1170115_0' }"
|
||||
|
||||
input:
|
||||
path fasta
|
||||
|
|
|
@ -2,20 +2,26 @@ process DIAMOND_BLASTP {
|
|||
tag "$meta.id"
|
||||
label 'process_medium'
|
||||
|
||||
// Dimaond is limited to v2.0.9 because there is not a
|
||||
// singularity version higher than this at the current time.
|
||||
conda (params.enable_conda ? "bioconda::diamond=2.0.9" : null)
|
||||
conda (params.enable_conda ? "bioconda::diamond=2.0.15" : null)
|
||||
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
|
||||
'https://depot.galaxyproject.org/singularity/diamond:2.0.9--hdcc8f71_0' :
|
||||
'quay.io/biocontainers/diamond:2.0.9--hdcc8f71_0' }"
|
||||
'https://depot.galaxyproject.org/singularity/diamond:2.0.15--hb97b32f_0' :
|
||||
'quay.io/biocontainers/diamond:2.0.15--hb97b32f_0' }"
|
||||
|
||||
input:
|
||||
tuple val(meta), path(fasta)
|
||||
path db
|
||||
path db
|
||||
val out_ext
|
||||
val blast_columns
|
||||
|
||||
output:
|
||||
tuple val(meta), path('*.txt'), emit: txt
|
||||
path "versions.yml" , emit: versions
|
||||
tuple val(meta), path('*.blast'), optional: true, emit: blast
|
||||
tuple val(meta), path('*.xml') , optional: true, emit: xml
|
||||
tuple val(meta), path('*.txt') , optional: true, emit: txt
|
||||
tuple val(meta), path('*.daa') , optional: true, emit: daa
|
||||
tuple val(meta), path('*.sam') , optional: true, emit: sam
|
||||
tuple val(meta), path('*.tsv') , optional: true, emit: tsv
|
||||
tuple val(meta), path('*.paf') , optional: true, emit: paf
|
||||
path "versions.yml" , emit: versions
|
||||
|
||||
when:
|
||||
task.ext.when == null || task.ext.when
|
||||
|
@ -23,6 +29,21 @@ process DIAMOND_BLASTP {
|
|||
script:
|
||||
def args = task.ext.args ?: ''
|
||||
def prefix = task.ext.prefix ?: "${meta.id}"
|
||||
def columns = blast_columns ? "${blast_columns}" : ''
|
||||
switch ( out_ext ) {
|
||||
case "blast": outfmt = 0; break
|
||||
case "xml": outfmt = 5; break
|
||||
case "txt": outfmt = 6; break
|
||||
case "daa": outfmt = 100; break
|
||||
case "sam": outfmt = 101; break
|
||||
case "tsv": outfmt = 102; break
|
||||
case "paf": outfmt = 103; break
|
||||
default:
|
||||
outfmt = '6';
|
||||
out_ext = 'txt';
|
||||
log.warn("Unknown output file format provided (${out_ext}): selecting DIAMOND default of tabular BLAST output (txt)");
|
||||
break
|
||||
}
|
||||
"""
|
||||
DB=`find -L ./ -name "*.dmnd" | sed 's/.dmnd//'`
|
||||
|
||||
|
@ -31,8 +52,9 @@ process DIAMOND_BLASTP {
|
|||
--threads $task.cpus \\
|
||||
--db \$DB \\
|
||||
--query $fasta \\
|
||||
--outfmt ${outfmt} ${columns} \\
|
||||
$args \\
|
||||
--out ${prefix}.txt
|
||||
--out ${prefix}.${out_ext}
|
||||
|
||||
cat <<-END_VERSIONS > versions.yml
|
||||
"${task.process}":
|
||||
|
|
|
@ -28,12 +28,50 @@ input:
|
|||
type: directory
|
||||
description: Directory containing the protein blast database
|
||||
pattern: "*"
|
||||
- out_ext:
|
||||
type: string
|
||||
description: |
|
||||
Specify the type of output file to be generated. `blast` corresponds to
|
||||
BLAST pairwise format. `xml` corresponds to BLAST xml format.
|
||||
`txt` corresponds to to BLAST tabular format. `tsv` corresponds to
|
||||
taxonomic classification format.
|
||||
pattern: "blast|xml|txt|daa|sam|tsv|paf"
|
||||
- blast_columns:
|
||||
type: string
|
||||
description: |
|
||||
Optional space separated list of DIAMOND tabular BLAST output keywords
|
||||
used for in conjunction with the 'txt' out_ext option (--outfmt 6). See
|
||||
DIAMOND documnetation for more information.
|
||||
|
||||
output:
|
||||
- txt:
|
||||
- blast:
|
||||
type: file
|
||||
description: File containing blastp hits
|
||||
pattern: "*.{blastp.txt}"
|
||||
pattern: "*.{blast}"
|
||||
- xml:
|
||||
type: file
|
||||
description: File containing blastp hits
|
||||
pattern: "*.{xml}"
|
||||
- txt:
|
||||
type: file
|
||||
description: File containing hits in tabular BLAST format.
|
||||
pattern: "*.{txt}"
|
||||
- daa:
|
||||
type: file
|
||||
description: File containing hits DAA format
|
||||
pattern: "*.{daa}"
|
||||
- sam:
|
||||
type: file
|
||||
description: File containing aligned reads in SAM format
|
||||
pattern: "*.{sam}"
|
||||
- tsv:
|
||||
type: file
|
||||
description: Tab separated file containing taxonomic classification of hits
|
||||
pattern: "*.{tsv}"
|
||||
- paf:
|
||||
type: file
|
||||
description: File containing aligned reads in pairwise mapping format format
|
||||
pattern: "*.{paf}"
|
||||
- versions:
|
||||
type: file
|
||||
description: File containing software versions
|
||||
|
@ -41,3 +79,4 @@ output:
|
|||
|
||||
authors:
|
||||
- "@spficklin"
|
||||
- "@jfy133"
|
||||
|
|
|
@ -2,20 +2,26 @@ process DIAMOND_BLASTX {
|
|||
tag "$meta.id"
|
||||
label 'process_medium'
|
||||
|
||||
// Dimaond is limited to v2.0.9 because there is not a
|
||||
// singularity version higher than this at the current time.
|
||||
conda (params.enable_conda ? "bioconda::diamond=2.0.9" : null)
|
||||
conda (params.enable_conda ? "bioconda::diamond=2.0.15" : null)
|
||||
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
|
||||
'https://depot.galaxyproject.org/singularity/diamond:2.0.9--hdcc8f71_0' :
|
||||
'quay.io/biocontainers/diamond:2.0.9--hdcc8f71_0' }"
|
||||
'https://depot.galaxyproject.org/singularity/diamond:2.0.15--hb97b32f_0' :
|
||||
'quay.io/biocontainers/diamond:2.0.15--hb97b32f_0' }"
|
||||
|
||||
input:
|
||||
tuple val(meta), path(fasta)
|
||||
path db
|
||||
path db
|
||||
val out_ext
|
||||
val blast_columns
|
||||
|
||||
output:
|
||||
tuple val(meta), path('*.txt'), emit: txt
|
||||
path "versions.yml" , emit: versions
|
||||
tuple val(meta), path('*.blast'), optional: true, emit: blast
|
||||
tuple val(meta), path('*.xml') , optional: true, emit: xml
|
||||
tuple val(meta), path('*.txt') , optional: true, emit: txt
|
||||
tuple val(meta), path('*.daa') , optional: true, emit: daa
|
||||
tuple val(meta), path('*.sam') , optional: true, emit: sam
|
||||
tuple val(meta), path('*.tsv') , optional: true, emit: tsv
|
||||
tuple val(meta), path('*.paf') , optional: true, emit: paf
|
||||
path "versions.yml" , emit: versions
|
||||
|
||||
when:
|
||||
task.ext.when == null || task.ext.when
|
||||
|
@ -23,6 +29,21 @@ process DIAMOND_BLASTX {
|
|||
script:
|
||||
def args = task.ext.args ?: ''
|
||||
def prefix = task.ext.prefix ?: "${meta.id}"
|
||||
def columns = blast_columns ? "${blast_columns}" : ''
|
||||
switch ( out_ext ) {
|
||||
case "blast": outfmt = 0; break
|
||||
case "xml": outfmt = 5; break
|
||||
case "txt": outfmt = 6; break
|
||||
case "daa": outfmt = 100; break
|
||||
case "sam": outfmt = 101; break
|
||||
case "tsv": outfmt = 102; break
|
||||
case "paf": outfmt = 103; break
|
||||
default:
|
||||
outfmt = '6';
|
||||
out_ext = 'txt';
|
||||
log.warn("Unknown output file format provided (${out_ext}): selecting DIAMOND default of tabular BLAST output (txt)");
|
||||
break
|
||||
}
|
||||
"""
|
||||
DB=`find -L ./ -name "*.dmnd" | sed 's/.dmnd//'`
|
||||
|
||||
|
@ -31,8 +52,9 @@ process DIAMOND_BLASTX {
|
|||
--threads $task.cpus \\
|
||||
--db \$DB \\
|
||||
--query $fasta \\
|
||||
--outfmt ${outfmt} ${columns} \\
|
||||
$args \\
|
||||
--out ${prefix}.txt
|
||||
--out ${prefix}.${out_ext}
|
||||
|
||||
cat <<-END_VERSIONS > versions.yml
|
||||
"${task.process}":
|
||||
|
|
|
@ -28,12 +28,44 @@ input:
|
|||
type: directory
|
||||
description: Directory containing the nucelotide blast database
|
||||
pattern: "*"
|
||||
- out_ext:
|
||||
type: string
|
||||
description: |
|
||||
Specify the type of output file to be generated. `blast` corresponds to
|
||||
BLAST pairwise format. `xml` corresponds to BLAST xml format.
|
||||
`txt` corresponds to to BLAST tabular format. `tsv` corresponds to
|
||||
taxonomic classification format.
|
||||
pattern: "blast|xml|txt|daa|sam|tsv|paf"
|
||||
|
||||
output:
|
||||
- blast:
|
||||
type: file
|
||||
description: File containing blastp hits
|
||||
pattern: "*.{blast}"
|
||||
- xml:
|
||||
type: file
|
||||
description: File containing blastp hits
|
||||
pattern: "*.{xml}"
|
||||
- txt:
|
||||
type: file
|
||||
description: File containing blastx hits
|
||||
pattern: "*.{blastx.txt}"
|
||||
description: File containing hits in tabular BLAST format.
|
||||
pattern: "*.{txt}"
|
||||
- daa:
|
||||
type: file
|
||||
description: File containing hits DAA format
|
||||
pattern: "*.{daa}"
|
||||
- sam:
|
||||
type: file
|
||||
description: File containing aligned reads in SAM format
|
||||
pattern: "*.{sam}"
|
||||
- tsv:
|
||||
type: file
|
||||
description: Tab separated file containing taxonomic classification of hits
|
||||
pattern: "*.{tsv}"
|
||||
- paf:
|
||||
type: file
|
||||
description: File containing aligned reads in pairwise mapping format format
|
||||
pattern: "*.{paf}"
|
||||
- versions:
|
||||
type: file
|
||||
description: File containing software versions
|
||||
|
@ -41,3 +73,4 @@ output:
|
|||
|
||||
authors:
|
||||
- "@spficklin"
|
||||
- "@jfy133"
|
||||
|
|
|
@ -2,12 +2,10 @@ process DIAMOND_MAKEDB {
|
|||
tag "$fasta"
|
||||
label 'process_medium'
|
||||
|
||||
// Dimaond is limited to v2.0.9 because there is not a
|
||||
// singularity version higher than this at the current time.
|
||||
conda (params.enable_conda ? 'bioconda::diamond=2.0.9' : null)
|
||||
conda (params.enable_conda ? "bioconda::diamond=2.0.15" : null)
|
||||
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
|
||||
'https://depot.galaxyproject.org/singularity/diamond:2.0.9--hdcc8f71_0' :
|
||||
'quay.io/biocontainers/diamond:2.0.9--hdcc8f71_0' }"
|
||||
'https://depot.galaxyproject.org/singularity/diamond:2.0.15--hb97b32f_0' :
|
||||
'quay.io/biocontainers/diamond:2.0.15--hb97b32f_0' }"
|
||||
|
||||
input:
|
||||
path fasta
|
||||
|
|
|
@ -24,44 +24,23 @@ process DRAGMAP_ALIGN {
|
|||
def args = task.ext.args ?: ''
|
||||
def args2 = task.ext.args2 ?: ''
|
||||
def prefix = task.ext.prefix ?: "${meta.id}"
|
||||
def read_group = meta.read_group ? "--RGSM ${meta.read_group}" : ""
|
||||
def reads_command = meta.single_end ? "-1 $reads" : "-1 ${reads[0]} -2 ${reads[1]}"
|
||||
def samtools_command = sort_bam ? 'sort' : 'view'
|
||||
if (meta.single_end) {
|
||||
"""
|
||||
dragen-os \\
|
||||
-r $hashmap \\
|
||||
$args \\
|
||||
$read_group \\
|
||||
--num-threads $task.cpus \\
|
||||
-1 $reads \\
|
||||
2> ${prefix}.dragmap.log \\
|
||||
| samtools $samtools_command $args2 --threads $task.cpus -o ${prefix}.bam -
|
||||
|
||||
cat <<-END_VERSIONS > versions.yml
|
||||
"${task.process}":
|
||||
dragmap: \$(echo \$(dragen-os --version 2>&1))
|
||||
samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
|
||||
pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' )
|
||||
END_VERSIONS
|
||||
"""
|
||||
} else {
|
||||
"""
|
||||
dragen-os \\
|
||||
-r $hashmap \\
|
||||
$args \\
|
||||
$read_group \\
|
||||
--num-threads $task.cpus \\
|
||||
-1 ${reads[0]} \\
|
||||
-2 ${reads[1]} \\
|
||||
2> ${prefix}.dragmap.log \\
|
||||
| samtools $samtools_command $args2 --threads $task.cpus -o ${prefix}.bam -
|
||||
"""
|
||||
dragen-os \\
|
||||
-r $hashmap \\
|
||||
$args \\
|
||||
--num-threads $task.cpus \\
|
||||
$reads_command \\
|
||||
2> ${prefix}.dragmap.log \\
|
||||
| samtools $samtools_command $args2 --threads $task.cpus -o ${prefix}.bam -
|
||||
|
||||
cat <<-END_VERSIONS > versions.yml
|
||||
"${task.process}":
|
||||
dragmap: \$(echo \$(dragen-os --version 2>&1))
|
||||
samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
|
||||
pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' )
|
||||
END_VERSIONS
|
||||
"""
|
||||
}
|
||||
cat <<-END_VERSIONS > versions.yml
|
||||
"${task.process}":
|
||||
dragmap: \$(echo \$(dragen-os --version 2>&1))
|
||||
samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
|
||||
pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' )
|
||||
END_VERSIONS
|
||||
"""
|
||||
}
|
||||
|
|
89
modules/elprep/filter/main.nf
Normal file
89
modules/elprep/filter/main.nf
Normal file
|
@ -0,0 +1,89 @@
|
|||
process ELPREP_FILTER {
|
||||
tag "$meta.id"
|
||||
label 'process_high'
|
||||
|
||||
conda (params.enable_conda ? "bioconda::elprep=5.1.2" : null)
|
||||
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
|
||||
'https://depot.galaxyproject.org/singularity/elprep:5.1.2--he881be0_0':
|
||||
'quay.io/biocontainers/elprep:5.1.2--he881be0_0' }"
|
||||
|
||||
input:
|
||||
tuple val(meta), path(bam)
|
||||
val(run_haplotypecaller)
|
||||
val(run_bqsr)
|
||||
path(reference_sequences)
|
||||
path(filter_regions_bed)
|
||||
path(reference_elfasta)
|
||||
path(known_sites_elsites)
|
||||
path(target_regions_bed)
|
||||
path(intermediate_bqsr_tables)
|
||||
val(bqsr_tables_only)
|
||||
val(get_activity_profile)
|
||||
val(get_assembly_regions)
|
||||
|
||||
|
||||
output:
|
||||
tuple val(meta), path("output/**.{bam,sam}") ,emit: bam
|
||||
tuple val(meta), path("*.metrics.txt") ,optional: true, emit: metrics
|
||||
tuple val(meta), path("*.recall") ,optional: true, emit: recall
|
||||
tuple val(meta), path("*.vcf.gz") ,optional: true, emit: gvcf
|
||||
tuple val(meta), path("*.table") ,optional: true, emit: table
|
||||
tuple val(meta), path("*.activity_profile.igv") ,optional: true, emit: activity_profile
|
||||
tuple val(meta), path("*.assembly_regions.igv") ,optional: true, emit: assembly_regions
|
||||
path "versions.yml" ,emit: versions
|
||||
|
||||
when:
|
||||
task.ext.when == null || task.ext.when
|
||||
|
||||
script:
|
||||
def args = task.ext.args ?: ''
|
||||
def prefix = task.ext.prefix ?: "${meta.id}"
|
||||
def suffix = args.contains("--output-type sam") ? "sam" : "bam"
|
||||
|
||||
// filter args
|
||||
def reference_sequences_cmd = reference_sequences ? " --replace-reference-sequences ${reference_sequences}" : ""
|
||||
def filter_regions_cmd = filter_regions_bed ? " --filter-non-overlapping-reads ${filter_regions_bed}" : ""
|
||||
|
||||
// markdup args
|
||||
def markdup_cmd = args.contains("--mark-duplicates") ? " --mark-optical-duplicates ${prefix}.metrics.txt": ""
|
||||
|
||||
// variant calling args
|
||||
def haplotyper_cmd = run_haplotypecaller ? " --haplotypecaller ${prefix}.g.vcf.gz": ""
|
||||
|
||||
def fasta_cmd = reference_elfasta ? " --reference ${reference_elfasta}": ""
|
||||
def known_sites_cmd = known_sites_elsites ? " --known-sites ${known_sites_elsites}": ""
|
||||
def target_regions_cmd = target_regions_bed ? " --target-regions ${target_regions_bed}": ""
|
||||
|
||||
// bqsr args
|
||||
def bqsr_cmd = run_bqsr ? " --bqsr ${prefix}.recall": ""
|
||||
def bqsr_tables_only_cmd = bqsr_tables_only ? " --bqsr-tables-only ${prefix}.table": ""
|
||||
|
||||
def intermediate_bqsr_cmd = intermediate_bqsr_tables ? " --bqsr-apply .": ""
|
||||
|
||||
// misc
|
||||
def activity_profile_cmd = get_activity_profile ? " --activity-profile ${prefix}.activity_profile.igv": ""
|
||||
def assembly_regions_cmd = get_assembly_regions ? " --assembly-regions ${prefix}.assembly_regions.igv": ""
|
||||
|
||||
"""
|
||||
elprep filter ${bam} output/${prefix}.${suffix} \\
|
||||
${reference_sequences_cmd} \\
|
||||
${filter_regions_cmd} \\
|
||||
${markdup_cmd} \\
|
||||
${haplotyper_cmd} \\
|
||||
${fasta_cmd} \\
|
||||
${known_sites_cmd} \\
|
||||
${target_regions_cmd} \\
|
||||
${bqsr_cmd} \\
|
||||
${bqsr_tables_only_cmd} \\
|
||||
${intermediate_bqsr_cmd} \\
|
||||
${activity_profile_cmd} \\
|
||||
${assembly_regions_cmd} \\
|
||||
--nr-of-threads ${task.cpus} \\
|
||||
$args
|
||||
|
||||
cat <<-END_VERSIONS > versions.yml
|
||||
"${task.process}":
|
||||
elprep: \$(elprep 2>&1 | head -n2 | tail -n1 |sed 's/^.*version //;s/ compiled.*\$//')
|
||||
END_VERSIONS
|
||||
"""
|
||||
}
|
106
modules/elprep/filter/meta.yml
Normal file
106
modules/elprep/filter/meta.yml
Normal file
|
@ -0,0 +1,106 @@
|
|||
name: "elprep_filter"
|
||||
description: "Filter, sort and markdup sam/bam files, with optional BQSR and variant calling."
|
||||
keywords:
|
||||
- sort
|
||||
- bam
|
||||
- sam
|
||||
- filter
|
||||
- variant calling
|
||||
tools:
|
||||
- "elprep":
|
||||
description: "elPrep is a high-performance tool for preparing .sam/.bam files for variant calling in sequencing pipelines. It can be used as a drop-in replacement for SAMtools/Picard/GATK4."
|
||||
homepage: "https://github.com/ExaScience/elprep"
|
||||
documentation: "https://github.com/ExaScience/elprep"
|
||||
tool_dev_url: "https://github.com/ExaScience/elprep"
|
||||
doi: "10.1371/journal.pone.0244471"
|
||||
licence: "['AGPL v3']"
|
||||
|
||||
input:
|
||||
- meta:
|
||||
type: map
|
||||
description: |
|
||||
Groovy Map containing sample information
|
||||
e.g. [ id:'test', single_end:false ]
|
||||
- bam:
|
||||
type: file
|
||||
description: Input SAM/BAM file
|
||||
pattern: "*.{bam,sam}"
|
||||
- run_haplotypecaller:
|
||||
type: boolean
|
||||
description: Run variant calling on the input files. Needed to generate gvcf output.
|
||||
- run_bqsr:
|
||||
type: boolean
|
||||
description: Run BQSR on the input files. Needed to generate recall metrics.
|
||||
- reference_sequences:
|
||||
type: file
|
||||
description: Optional SAM header to replace existing header.
|
||||
pattern: "*.sam"
|
||||
- filter_regions_bed:
|
||||
type: file
|
||||
description: Optional BED file containing regions to filter.
|
||||
pattern: "*.bed"
|
||||
- reference_elfasta:
|
||||
type: file
|
||||
description: Elfasta file, required for BQSR and variant calling.
|
||||
pattern: "*.elfasta"
|
||||
- known_sites:
|
||||
type: file
|
||||
description: Optional elsites file containing known SNPs for BQSR.
|
||||
pattern: "*.elsites"
|
||||
- target_regions_bed:
|
||||
type: file
|
||||
description: Optional BED file containing target regions for BQSR and variant calling.
|
||||
pattern: "*.bed"
|
||||
- intermediate_bqsr_tables:
|
||||
type: file
|
||||
description: Optional list of BQSR tables, used when parsing files created by `elprep split`
|
||||
pattern: "*.table"
|
||||
- bqsr_tables_only:
|
||||
type: boolean
|
||||
description: Write intermediate BQSR tables, used when parsing files created by `elprep split`.
|
||||
- get_activity_profile:
|
||||
type: boolean
|
||||
description: Get the activity profile calculated by the haplotypecaller to the given file in IGV format.
|
||||
- get_assembly_regions:
|
||||
type: boolean
|
||||
description: Get the assembly regions calculated by haplotypecaller to the speficied file in IGV format.
|
||||
output:
|
||||
- meta:
|
||||
type: map
|
||||
description: |
|
||||
Groovy Map containing sample information
|
||||
e.g. [ id:'test', single_end:false ]
|
||||
- versions:
|
||||
type: file
|
||||
description: File containing software versions
|
||||
pattern: "versions.yml"
|
||||
- bam:
|
||||
type: file
|
||||
description: Sorted, markdup, optionally BQSR BAM/SAM file
|
||||
pattern: "*.{bam,sam}"
|
||||
- metrics:
|
||||
type: file
|
||||
description: Optional duplicate metrics file generated by elprep
|
||||
pattern: "*.{metrics.txt}"
|
||||
- recall:
|
||||
type: file
|
||||
description: Optional recall metrics file generated by elprep
|
||||
pattern: "*.{recall}"
|
||||
- gvcf:
|
||||
type: file
|
||||
description: Optional GVCF output file
|
||||
pattern: "*.{vcf.gz}"
|
||||
- table:
|
||||
type: file
|
||||
description: Optional intermediate BQSR table output file
|
||||
pattern: "*.{table}"
|
||||
- activity_profile:
|
||||
type: file
|
||||
description: Optional activity profile output file
|
||||
pattern: "*.{activity_profile.igv}"
|
||||
- assembly_regions:
|
||||
type: file
|
||||
description: Optional activity regions output file
|
||||
pattern: "*.{assembly_regions.igv}"
|
||||
authors:
|
||||
- "@matthdsm"
|
43
modules/elprep/merge/main.nf
Normal file
43
modules/elprep/merge/main.nf
Normal file
|
@ -0,0 +1,43 @@
|
|||
process ELPREP_MERGE {
|
||||
tag "$meta.id"
|
||||
label 'process_low'
|
||||
|
||||
conda (params.enable_conda ? "bioconda::elprep=5.1.2" : null)
|
||||
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
|
||||
'https://depot.galaxyproject.org/singularity/elprep:5.1.2--he881be0_0':
|
||||
'quay.io/biocontainers/elprep:5.1.2--he881be0_0' }"
|
||||
|
||||
input:
|
||||
tuple val(meta), path(bam)
|
||||
|
||||
output:
|
||||
tuple val(meta), path("output/**.{bam,sam}") , emit: bam
|
||||
path "versions.yml" , emit: versions
|
||||
|
||||
when:
|
||||
task.ext.when == null || task.ext.when
|
||||
|
||||
script:
|
||||
def args = task.ext.args ?: ''
|
||||
def prefix = task.ext.prefix ?: "${meta.id}"
|
||||
def suffix = args.contains("--output-type sam") ? "sam" : "bam"
|
||||
def single_end = meta.single_end ? " --single-end" : ""
|
||||
|
||||
"""
|
||||
# create directory and move all input so elprep can find and merge them before splitting
|
||||
mkdir input
|
||||
mv ${bam} input/
|
||||
|
||||
elprep merge \\
|
||||
input/ \\
|
||||
output/${prefix}.${suffix} \\
|
||||
$args \\
|
||||
${single_end} \\
|
||||
--nr-of-threads $task.cpus
|
||||
|
||||
cat <<-END_VERSIONS > versions.yml
|
||||
"${task.process}":
|
||||
elprep: \$(elprep 2>&1 | head -n2 | tail -n1 |sed 's/^.*version //;s/ compiled.*\$//')
|
||||
END_VERSIONS
|
||||
"""
|
||||
}
|
44
modules/elprep/merge/meta.yml
Normal file
44
modules/elprep/merge/meta.yml
Normal file
|
@ -0,0 +1,44 @@
|
|||
name: "elprep_merge"
|
||||
description: Merge split bam/sam chunks in one file
|
||||
keywords:
|
||||
- bam
|
||||
- sam
|
||||
- merge
|
||||
tools:
|
||||
- "elprep":
|
||||
description: "elPrep is a high-performance tool for preparing .sam/.bam files for variant calling in sequencing pipelines. It can be used as a drop-in replacement for SAMtools/Picard/GATK4."
|
||||
homepage: "https://github.com/ExaScience/elprep"
|
||||
documentation: "https://github.com/ExaScience/elprep"
|
||||
tool_dev_url: "https://github.com/ExaScience/elprep"
|
||||
doi: "10.1371/journal.pone.0244471"
|
||||
licence: "['AGPL v3']"
|
||||
|
||||
input:
|
||||
- meta:
|
||||
type: map
|
||||
description: |
|
||||
Groovy Map containing sample information
|
||||
e.g. [ id:'test', single_end:false ]
|
||||
- bam:
|
||||
type: file
|
||||
description: List of BAM/SAM chunks to merge
|
||||
pattern: "*.{bam,sam}"
|
||||
|
||||
output:
|
||||
- meta:
|
||||
type: map
|
||||
description: |
|
||||
Groovy Map containing sample information
|
||||
e.g. [ id:'test', single_end:false ]
|
||||
#
|
||||
- versions:
|
||||
type: file
|
||||
description: File containing software versions
|
||||
pattern: "versions.yml"
|
||||
- bam:
|
||||
type: file
|
||||
description: Merged BAM/SAM file
|
||||
pattern: "*.{bam,sam}"
|
||||
|
||||
authors:
|
||||
- "@matthdsm"
|
45
modules/elprep/split/main.nf
Normal file
45
modules/elprep/split/main.nf
Normal file
|
@ -0,0 +1,45 @@
|
|||
process ELPREP_SPLIT {
|
||||
tag "$meta.id"
|
||||
label 'process_low'
|
||||
|
||||
conda (params.enable_conda ? "bioconda::elprep=5.1.2" : null)
|
||||
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
|
||||
'https://depot.galaxyproject.org/singularity/elprep:5.1.2--he881be0_0':
|
||||
'quay.io/biocontainers/elprep:5.1.2--he881be0_0' }"
|
||||
|
||||
input:
|
||||
tuple val(meta), path(bam)
|
||||
|
||||
output:
|
||||
tuple val(meta), path("output/**.{bam,sam}"), emit: bam
|
||||
path "versions.yml" , emit: versions
|
||||
|
||||
when:
|
||||
task.ext.when == null || task.ext.when
|
||||
|
||||
script:
|
||||
def args = task.ext.args ?: ''
|
||||
def prefix = task.ext.prefix ?: "${meta.id}"
|
||||
def single_end = meta.single_end ? " --single-end": ""
|
||||
|
||||
"""
|
||||
# create directory and move all input so elprep can find and merge them before splitting
|
||||
mkdir input
|
||||
mv ${bam} input/
|
||||
|
||||
mkdir ${prefix}
|
||||
|
||||
elprep split \\
|
||||
input \\
|
||||
output/ \\
|
||||
$args \\
|
||||
$single_end \\
|
||||
--nr-of-threads $task.cpus \\
|
||||
--output-prefix $prefix
|
||||
|
||||
cat <<-END_VERSIONS > versions.yml
|
||||
"${task.process}":
|
||||
elprep: \$(elprep 2>&1 | head -n2 | tail -n1 |sed 's/^.*version //;s/ compiled.*\$//')
|
||||
END_VERSIONS
|
||||
"""
|
||||
}
|
43
modules/elprep/split/meta.yml
Normal file
43
modules/elprep/split/meta.yml
Normal file
|
@ -0,0 +1,43 @@
|
|||
name: "elprep_split"
|
||||
description: Split bam file into manageable chunks
|
||||
keywords:
|
||||
- bam
|
||||
- split by chromosome
|
||||
tools:
|
||||
- "elprep":
|
||||
description: "elPrep is a high-performance tool for preparing .sam/.bam files for variant calling in sequencing pipelines. It can be used as a drop-in replacement for SAMtools/Picard/GATK4."
|
||||
homepage: "https://github.com/ExaScience/elprep"
|
||||
documentation: "https://github.com/ExaScience/elprep"
|
||||
tool_dev_url: "https://github.com/ExaScience/elprep"
|
||||
doi: "10.1371"
|
||||
licence: "['AGPL v3']"
|
||||
|
||||
input:
|
||||
- meta:
|
||||
type: map
|
||||
description: |
|
||||
Groovy Map containing sample information
|
||||
e.g. [ id:'test', single_end:false ]
|
||||
- bam:
|
||||
type: file
|
||||
description: List of BAM/SAM files
|
||||
pattern: "*.{bam,sam}"
|
||||
|
||||
output:
|
||||
- meta:
|
||||
type: map
|
||||
description: |
|
||||
Groovy Map containing sample information
|
||||
e.g. [ id:'test', single_end:false ]
|
||||
#
|
||||
- versions:
|
||||
type: file
|
||||
description: File containing software versions
|
||||
pattern: "versions.yml"
|
||||
- bam:
|
||||
type: file
|
||||
description: List of split BAM/SAM files
|
||||
pattern: "*.{bam,sam}"
|
||||
|
||||
authors:
|
||||
- "@matthdsm"
|
41
modules/gamma/main.nf
Normal file
41
modules/gamma/main.nf
Normal file
|
@ -0,0 +1,41 @@
|
|||
def VERSION = '2.1' // Version information not provided by tool on CLI
|
||||
|
||||
process GAMMA {
|
||||
tag "$meta.id"
|
||||
label 'process_low'
|
||||
|
||||
conda (params.enable_conda ? "bioconda::gamma=2.1" : null)
|
||||
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
|
||||
'https://depot.galaxyproject.org/singularity/gamma%3A2.1--hdfd78af_0':
|
||||
'quay.io/biocontainers/gamma:2.1--hdfd78af_0' }"
|
||||
|
||||
input:
|
||||
tuple val(meta), path(fasta)
|
||||
path(db)
|
||||
|
||||
output:
|
||||
tuple val(meta), path("*.gamma") , emit: gamma
|
||||
tuple val(meta), path("*.psl") , emit: psl
|
||||
tuple val(meta), path("*.gff") , optional:true , emit: gff
|
||||
tuple val(meta), path("*.fasta"), optional:true , emit: fasta
|
||||
path "versions.yml" , emit: versions
|
||||
|
||||
when:
|
||||
task.ext.when == null || task.ext.when
|
||||
|
||||
script:
|
||||
def args = task.ext.args ?: ''
|
||||
def prefix = task.ext.prefix ?: "${meta.id}"
|
||||
"""
|
||||
GAMMA.py \\
|
||||
$args \\
|
||||
$fasta \\
|
||||
$db \\
|
||||
$prefix
|
||||
|
||||
cat <<-END_VERSIONS > versions.yml
|
||||
"${task.process}":
|
||||
gamma: $VERSION
|
||||
END_VERSIONS
|
||||
"""
|
||||
}
|
63
modules/gamma/meta.yml
Normal file
63
modules/gamma/meta.yml
Normal file
|
@ -0,0 +1,63 @@
|
|||
name: "gamma"
|
||||
description: Gene Allele Mutation Microbial Assessment
|
||||
keywords:
|
||||
- gamma
|
||||
- gene-calling
|
||||
tools:
|
||||
- "gamma":
|
||||
description: "Tool for Gene Allele Mutation Microbial Assessment"
|
||||
homepage: "https://github.com/rastanton/GAMMA"
|
||||
documentation: "https://github.com/rastanton/GAMMA"
|
||||
tool_dev_url: "https://github.com/rastanton/GAMMA"
|
||||
doi: "10.1093/bioinformatics/btab607"
|
||||
licence: "['Apache License 2.0']"
|
||||
|
||||
input:
|
||||
- meta:
|
||||
type: map
|
||||
description: |
|
||||
Groovy Map containing sample information
|
||||
e.g. [ id:'test', single_end:false ]
|
||||
- fasta:
|
||||
type: file
|
||||
description: FASTA file
|
||||
pattern: "*.{fa,fasta}"
|
||||
- db:
|
||||
type: file
|
||||
description: Database in FASTA format
|
||||
pattern: "*.{fa,fasta}"
|
||||
|
||||
output:
|
||||
- meta:
|
||||
type: map
|
||||
description: |
|
||||
Groovy Map containing sample information
|
||||
e.g. [ id:'test', single_end:false ]
|
||||
- versions:
|
||||
type: file
|
||||
description: File containing software versions
|
||||
pattern: "versions.yml"
|
||||
|
||||
- gamma:
|
||||
type: file
|
||||
description: GAMMA file with annotated gene matches
|
||||
pattern: "*.{gamma}"
|
||||
|
||||
- psl:
|
||||
type: file
|
||||
description: PSL file with all gene matches found
|
||||
pattern: "*.{psl}"
|
||||
|
||||
- gff:
|
||||
type: file
|
||||
description: GFF file
|
||||
pattern: "*.{gff}"
|
||||
|
||||
- fasta:
|
||||
type: file
|
||||
description: multifasta file of the gene matches
|
||||
pattern: "*.{fasta}"
|
||||
|
||||
authors:
|
||||
- "@sateeshperi"
|
||||
- "@rastanton"
|
|
@ -14,9 +14,9 @@ process GATK4_APPLYBQSR {
|
|||
path dict
|
||||
|
||||
output:
|
||||
tuple val(meta), path("*.bam"), emit: bam, optional: true
|
||||
tuple val(meta), path("*.bam") , emit: bam, optional: true
|
||||
tuple val(meta), path("*.cram"), emit: cram, optional: true
|
||||
path "versions.yml" , emit: versions
|
||||
path "versions.yml" , emit: versions
|
||||
|
||||
when:
|
||||
task.ext.when == null || task.ext.when
|
||||
|
@ -24,8 +24,7 @@ process GATK4_APPLYBQSR {
|
|||
script:
|
||||
def args = task.ext.args ?: ''
|
||||
def prefix = task.ext.prefix ?: "${meta.id}"
|
||||
def interval = intervals ? "-L ${intervals}" : ""
|
||||
def file_type = input.getExtension()
|
||||
def interval_command = intervals ? "--intervals $intervals" : ""
|
||||
|
||||
def avail_mem = 3
|
||||
if (!task.memory) {
|
||||
|
@ -35,12 +34,12 @@ process GATK4_APPLYBQSR {
|
|||
}
|
||||
"""
|
||||
gatk --java-options "-Xmx${avail_mem}g" ApplyBQSR \\
|
||||
-R $fasta \\
|
||||
-I $input \\
|
||||
--input $input \\
|
||||
--output ${prefix}.${input.getExtension()} \\
|
||||
--reference $fasta \\
|
||||
--bqsr-recal-file $bqsr_table \\
|
||||
$interval \\
|
||||
$interval_command \\
|
||||
--tmp-dir . \\
|
||||
-O ${prefix}.${file_type} \\
|
||||
$args
|
||||
|
||||
cat <<-END_VERSIONS > versions.yml
|
||||
|
|
|
@ -61,6 +61,10 @@ output:
|
|||
type: file
|
||||
description: Recalibrated BAM file
|
||||
pattern: "*.{bam}"
|
||||
- cram:
|
||||
type: file
|
||||
description: Recalibrated CRAM file
|
||||
pattern: "*.{cram}"
|
||||
|
||||
authors:
|
||||
- "@yocra3"
|
||||
|
|
51
modules/gatk4/applybqsrspark/main.nf
Normal file
51
modules/gatk4/applybqsrspark/main.nf
Normal file
|
@ -0,0 +1,51 @@
|
|||
process GATK4_APPLYBQSR_SPARK {
|
||||
tag "$meta.id"
|
||||
label 'process_low'
|
||||
|
||||
conda (params.enable_conda ? "bioconda::gatk4=4.2.3.0" : null)
|
||||
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
|
||||
'https://depot.galaxyproject.org/singularity/gatk4:4.2.3.0--hdfd78af_0' :
|
||||
'quay.io/biocontainers/gatk4:4.2.3.0--hdfd78af_0' }"
|
||||
|
||||
input:
|
||||
tuple val(meta), path(input), path(input_index), path(bqsr_table), path(intervals)
|
||||
path fasta
|
||||
path fai
|
||||
path dict
|
||||
|
||||
output:
|
||||
tuple val(meta), path("*.bam") , emit: bam, optional: true
|
||||
tuple val(meta), path("*.cram"), emit: cram, optional: true
|
||||
path "versions.yml" , emit: versions
|
||||
|
||||
when:
|
||||
task.ext.when == null || task.ext.when
|
||||
|
||||
script:
|
||||
def args = task.ext.args ?: ''
|
||||
def prefix = task.ext.prefix ?: "${meta.id}"
|
||||
def interval_command = intervals ? "--intervals $intervals" : ""
|
||||
|
||||
def avail_mem = 3
|
||||
if (!task.memory) {
|
||||
log.info '[GATK ApplyBQSRSpark] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.'
|
||||
} else {
|
||||
avail_mem = task.memory.giga
|
||||
}
|
||||
"""
|
||||
gatk --java-options "-Xmx${avail_mem}g" ApplyBQSRSpark \\
|
||||
--input $input \\
|
||||
--output ${prefix}.${input.getExtension()} \\
|
||||
--reference $fasta \\
|
||||
--bqsr-recal-file $bqsr_table \\
|
||||
$interval_command \\
|
||||
--spark-master local[${task.cpus}] \\
|
||||
--tmp-dir . \\
|
||||
$args
|
||||
|
||||
cat <<-END_VERSIONS > versions.yml
|
||||
"${task.process}":
|
||||
gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//')
|
||||
END_VERSIONS
|
||||
"""
|
||||
}
|
72
modules/gatk4/applybqsrspark/meta.yml
Normal file
72
modules/gatk4/applybqsrspark/meta.yml
Normal file
|
@ -0,0 +1,72 @@
|
|||
name: gatk4_applybqsr_spark
|
||||
description: Apply base quality score recalibration (BQSR) to a bam file
|
||||
keywords:
|
||||
- bqsr
|
||||
- bam
|
||||
tools:
|
||||
- gatk4:
|
||||
description: |
|
||||
Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools
|
||||
with a primary focus on variant discovery and genotyping. Its powerful processing engine
|
||||
and high-performance computing features make it capable of taking on projects of any size.
|
||||
homepage: https://gatk.broadinstitute.org/hc/en-us
|
||||
documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s
|
||||
doi: 10.1158/1538-7445.AM2017-3590
|
||||
licence: ["Apache-2.0"]
|
||||
|
||||
input:
|
||||
- meta:
|
||||
type: map
|
||||
description: |
|
||||
Groovy Map containing sample information
|
||||
e.g. [ id:'test', single_end:false ]
|
||||
- input:
|
||||
type: file
|
||||
description: BAM/CRAM file from alignment
|
||||
pattern: "*.{bam,cram}"
|
||||
- input_index:
|
||||
type: file
|
||||
description: BAI/CRAI file from alignment
|
||||
pattern: "*.{bai,crai}"
|
||||
- bqsr_table:
|
||||
type: file
|
||||
description: Recalibration table from gatk4_baserecalibrator
|
||||
- intervals:
|
||||
type: file
|
||||
description: Bed file with the genomic regions included in the library (optional)
|
||||
- fasta:
|
||||
type: file
|
||||
description: The reference fasta file
|
||||
pattern: "*.fasta"
|
||||
- fai:
|
||||
type: file
|
||||
description: Index of reference fasta file
|
||||
pattern: "*.fasta.fai"
|
||||
- dict:
|
||||
type: file
|
||||
description: GATK sequence dictionary
|
||||
pattern: "*.dict"
|
||||
|
||||
output:
|
||||
- meta:
|
||||
type: map
|
||||
description: |
|
||||
Groovy Map containing sample information
|
||||
e.g. [ id:'test', single_end:false ]
|
||||
- versions:
|
||||
type: file
|
||||
description: File containing software versions
|
||||
pattern: "versions.yml"
|
||||
- bam:
|
||||
type: file
|
||||
description: Recalibrated BAM file
|
||||
pattern: "*.{bam}"
|
||||
- cram:
|
||||
type: file
|
||||
description: Recalibrated CRAM file
|
||||
pattern: "*.{cram}"
|
||||
|
||||
authors:
|
||||
- "@yocra3"
|
||||
- "@FriederikeHanssen"
|
||||
- "@maxulysse"
|
|
@ -8,15 +8,15 @@ process GATK4_APPLYVQSR {
|
|||
'quay.io/biocontainers/gatk4:4.2.5.0--hdfd78af_0' }"
|
||||
|
||||
input:
|
||||
tuple val(meta), path(vcf), path(tbi), path(recal), path(recalidx), path(tranches)
|
||||
path fasta
|
||||
path fai
|
||||
path dict
|
||||
tuple val(meta), path(vcf), path(vcf_tbi), path(recal), path(recal_index), path(tranches)
|
||||
path fasta
|
||||
path fai
|
||||
path dict
|
||||
|
||||
output:
|
||||
tuple val(meta), path("*.vcf.gz") , emit: vcf
|
||||
tuple val(meta), path("*.tbi") , emit: tbi
|
||||
path "versions.yml" , emit: versions
|
||||
tuple val(meta), path("*.vcf.gz"), emit: vcf
|
||||
tuple val(meta), path("*.tbi") , emit: tbi
|
||||
path "versions.yml" , emit: versions
|
||||
|
||||
when:
|
||||
task.ext.when == null || task.ext.when
|
||||
|
@ -24,7 +24,7 @@ process GATK4_APPLYVQSR {
|
|||
script:
|
||||
def args = task.ext.args ?: ''
|
||||
def prefix = task.ext.prefix ?: "${meta.id}"
|
||||
refCommand = fasta ? "-R ${fasta} " : ''
|
||||
def reference_command = fasta ? "--reference $fasta" : ''
|
||||
|
||||
def avail_mem = 3
|
||||
if (!task.memory) {
|
||||
|
@ -34,11 +34,12 @@ process GATK4_APPLYVQSR {
|
|||
}
|
||||
"""
|
||||
gatk --java-options "-Xmx${avail_mem}g" ApplyVQSR \\
|
||||
${refCommand} \\
|
||||
-V ${vcf} \\
|
||||
-O ${prefix}.vcf.gz \\
|
||||
--variant ${vcf} \\
|
||||
--output ${prefix}.vcf.gz \\
|
||||
$reference_command \\
|
||||
--tranches-file $tranches \\
|
||||
--recal-file $recal \\
|
||||
--tmp-dir . \\
|
||||
$args
|
||||
|
||||
cat <<-END_VERSIONS > versions.yml
|
||||
|
|
|
@ -29,20 +29,20 @@ input:
|
|||
type: file
|
||||
description: VCF file to be recalibrated, this should be the same file as used for the first stage VariantRecalibrator.
|
||||
pattern: "*.vcf"
|
||||
- tbi:
|
||||
- vcf_tbi:
|
||||
type: file
|
||||
description: Tbi index for the input vcf file.
|
||||
description: tabix index for the input vcf file.
|
||||
pattern: "*.vcf.tbi"
|
||||
- recal:
|
||||
type: file
|
||||
description: Recalibration file produced when the input vcf was run through VariantRecalibrator in stage 1.
|
||||
pattern: "*.recal"
|
||||
- recalidx:
|
||||
- recal_index:
|
||||
type: file
|
||||
description: Index file for the recalibration file.
|
||||
pattern: ".recal.idx"
|
||||
- tranches:
|
||||
type: boolean
|
||||
type: file
|
||||
description: Tranches file produced when the input vcf was run through VariantRecalibrator in stage 1.
|
||||
pattern: ".tranches"
|
||||
- fasta:
|
||||
|
|
|
@ -9,15 +9,15 @@ process GATK4_BASERECALIBRATOR {
|
|||
|
||||
input:
|
||||
tuple val(meta), path(input), path(input_index), path(intervals)
|
||||
path fasta
|
||||
path fai
|
||||
path dict
|
||||
path knownSites
|
||||
path knownSites_tbi
|
||||
path fasta
|
||||
path fai
|
||||
path dict
|
||||
path known_sites
|
||||
path known_sites_tbi
|
||||
|
||||
output:
|
||||
tuple val(meta), path("*.table"), emit: table
|
||||
path "versions.yml" , emit: versions
|
||||
path "versions.yml" , emit: versions
|
||||
|
||||
when:
|
||||
task.ext.when == null || task.ext.when
|
||||
|
@ -25,8 +25,8 @@ process GATK4_BASERECALIBRATOR {
|
|||
script:
|
||||
def args = task.ext.args ?: ''
|
||||
def prefix = task.ext.prefix ?: "${meta.id}"
|
||||
def intervalsCommand = intervals ? "-L ${intervals}" : ""
|
||||
def sitesCommand = knownSites.collect{"--known-sites ${it}"}.join(' ')
|
||||
def interval_command = intervals ? "--intervals $intervals" : ""
|
||||
def sites_command = known_sites.collect{"--known-sites $it"}.join(' ')
|
||||
|
||||
def avail_mem = 3
|
||||
if (!task.memory) {
|
||||
|
@ -34,16 +34,15 @@ process GATK4_BASERECALIBRATOR {
|
|||
} else {
|
||||
avail_mem = task.memory.giga
|
||||
}
|
||||
|
||||
"""
|
||||
gatk --java-options "-Xmx${avail_mem}g" BaseRecalibrator \
|
||||
-R $fasta \
|
||||
-I $input \
|
||||
$sitesCommand \
|
||||
$intervalsCommand \
|
||||
--tmp-dir . \
|
||||
$args \
|
||||
-O ${prefix}.table
|
||||
gatk --java-options "-Xmx${avail_mem}g" BaseRecalibrator \\
|
||||
--input $input \\
|
||||
--output ${prefix}.table \\
|
||||
--reference $fasta \\
|
||||
$interval_command \\
|
||||
$sites_command \\
|
||||
--tmp-dir . \\
|
||||
$args
|
||||
|
||||
cat <<-END_VERSIONS > versions.yml
|
||||
"${task.process}":
|
||||
|
|
|
@ -42,9 +42,14 @@ input:
|
|||
type: file
|
||||
description: GATK sequence dictionary
|
||||
pattern: "*.dict"
|
||||
- knownSites:
|
||||
- known_sites:
|
||||
type: file
|
||||
description: Bed file with the genomic regions included in the library (optional)
|
||||
description: VCF files with known sites for indels / snps (optional)
|
||||
pattern: "*.vcf.gz"
|
||||
- known_sites_tbi:
|
||||
type: file
|
||||
description: Tabix index of the known_sites (optional)
|
||||
pattern: "*.vcf.gz.tbi"
|
||||
|
||||
output:
|
||||
- meta:
|
||||
|
@ -64,3 +69,4 @@ output:
|
|||
authors:
|
||||
- "@yocra3"
|
||||
- "@FriederikeHanssen"
|
||||
- "@maxulysse"
|
||||
|
|
53
modules/gatk4/baserecalibratorspark/main.nf
Normal file
53
modules/gatk4/baserecalibratorspark/main.nf
Normal file
|
@ -0,0 +1,53 @@
|
|||
process GATK4_BASERECALIBRATOR_SPARK {
|
||||
tag "$meta.id"
|
||||
label 'process_low'
|
||||
|
||||
conda (params.enable_conda ? "bioconda::gatk4=4.2.3.0" : null)
|
||||
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
|
||||
'https://depot.galaxyproject.org/singularity/gatk4:4.2.3.0--hdfd78af_0' :
|
||||
'broadinstitute/gatk:4.2.3.0' }"
|
||||
|
||||
input:
|
||||
tuple val(meta), path(input), path(input_index), path(intervals)
|
||||
path fasta
|
||||
path fai
|
||||
path dict
|
||||
path known_sites
|
||||
path known_sites_tbi
|
||||
|
||||
output:
|
||||
tuple val(meta), path("*.table"), emit: table
|
||||
path "versions.yml" , emit: versions
|
||||
|
||||
when:
|
||||
task.ext.when == null || task.ext.when
|
||||
|
||||
script:
|
||||
def args = task.ext.args ?: ''
|
||||
def prefix = task.ext.prefix ?: "${meta.id}"
|
||||
def interval_command = intervals ? "--intervals $intervals" : ""
|
||||
def sites_command = known_sites.collect{"--known-sites $it"}.join(' ')
|
||||
|
||||
def avail_mem = 3
|
||||
if (!task.memory) {
|
||||
log.info '[GATK BaseRecalibratorSpark] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.'
|
||||
} else {
|
||||
avail_mem = task.memory.giga
|
||||
}
|
||||
"""
|
||||
gatk --java-options "-Xmx${avail_mem}g" BaseRecalibratorSpark \\
|
||||
--input $input \\
|
||||
--output ${prefix}.table \\
|
||||
--reference $fasta \\
|
||||
$interval_command \\
|
||||
$sites_command \\
|
||||
--spark-master local[${task.cpus}] \\
|
||||
--tmp-dir . \\
|
||||
$args
|
||||
|
||||
cat <<-END_VERSIONS > versions.yml
|
||||
"${task.process}":
|
||||
gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//')
|
||||
END_VERSIONS
|
||||
"""
|
||||
}
|
72
modules/gatk4/baserecalibratorspark/meta.yml
Normal file
72
modules/gatk4/baserecalibratorspark/meta.yml
Normal file
|
@ -0,0 +1,72 @@
|
|||
name: gatk4_baserecalibrator_spark
|
||||
description: Generate recalibration table for Base Quality Score Recalibration (BQSR)
|
||||
keywords:
|
||||
- sort
|
||||
tools:
|
||||
- gatk4:
|
||||
description: |
|
||||
Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools
|
||||
with a primary focus on variant discovery and genotyping. Its powerful processing engine
|
||||
and high-performance computing features make it capable of taking on projects of any size.
|
||||
homepage: https://gatk.broadinstitute.org/hc/en-us
|
||||
documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s
|
||||
doi: 10.1158/1538-7445.AM2017-3590
|
||||
licence: ["Apache-2.0"]
|
||||
|
||||
input:
|
||||
- meta:
|
||||
type: map
|
||||
description: |
|
||||
Groovy Map containing sample information
|
||||
e.g. [ id:'test', single_end:false ]
|
||||
- input:
|
||||
type: file
|
||||
description: BAM/CRAM file from alignment
|
||||
pattern: "*.{bam,cram}"
|
||||
- input_index:
|
||||
type: file
|
||||
description: BAI/CRAI file from alignment
|
||||
pattern: "*.{bai,crai}"
|
||||
- intervals:
|
||||
type: file
|
||||
description: Bed file with the genomic regions included in the library (optional)
|
||||
- fasta:
|
||||
type: file
|
||||
description: The reference fasta file
|
||||
pattern: "*.fasta"
|
||||
- fai:
|
||||
type: file
|
||||
description: Index of reference fasta file
|
||||
pattern: "*.fasta.fai"
|
||||
- dict:
|
||||
type: file
|
||||
description: GATK sequence dictionary
|
||||
pattern: "*.dict"
|
||||
- known_sites:
|
||||
type: file
|
||||
description: VCF files with known sites for indels / snps (optional)
|
||||
pattern: "*.vcf.gz"
|
||||
- known_sites_tbi:
|
||||
type: file
|
||||
description: Tabix index of the known_sites (optional)
|
||||
pattern: "*.vcf.gz.tbi"
|
||||
|
||||
output:
|
||||
- meta:
|
||||
type: map
|
||||
description: |
|
||||
Groovy Map containing sample information
|
||||
e.g. [ id:'test', single_end:false ]
|
||||
- versions:
|
||||
type: file
|
||||
description: File containing software versions
|
||||
pattern: "versions.yml"
|
||||
- table:
|
||||
type: file
|
||||
description: Recalibration table from BaseRecalibrator
|
||||
pattern: "*.{table}"
|
||||
|
||||
authors:
|
||||
- "@yocra3"
|
||||
- "@FriederikeHanssen"
|
||||
- "@maxulysse"
|
|
@ -9,7 +9,7 @@ process GATK4_BEDTOINTERVALLIST {
|
|||
|
||||
input:
|
||||
tuple val(meta), path(bed)
|
||||
path sequence_dict
|
||||
path dict
|
||||
|
||||
output:
|
||||
tuple val(meta), path('*.interval_list'), emit: interval_list
|
||||
|
@ -21,6 +21,7 @@ process GATK4_BEDTOINTERVALLIST {
|
|||
script:
|
||||
def args = task.ext.args ?: ''
|
||||
def prefix = task.ext.prefix ?: "${meta.id}"
|
||||
|
||||
def avail_mem = 3
|
||||
if (!task.memory) {
|
||||
log.info '[GATK BedToIntervalList] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.'
|
||||
|
@ -29,9 +30,10 @@ process GATK4_BEDTOINTERVALLIST {
|
|||
}
|
||||
"""
|
||||
gatk --java-options "-Xmx${avail_mem}g" BedToIntervalList \\
|
||||
-I $bed \\
|
||||
-SD $sequence_dict \\
|
||||
-O ${prefix}.interval_list \\
|
||||
--INPUT $bed \\
|
||||
--OUTPUT ${prefix}.interval_list \\
|
||||
--SEQUENCE_DICTIONARY $dict \\
|
||||
--TMP_DIR . \\
|
||||
$args
|
||||
|
||||
cat <<-END_VERSIONS > versions.yml
|
||||
|
|
|
@ -9,7 +9,6 @@ process GATK4_CALCULATECONTAMINATION {
|
|||
|
||||
input:
|
||||
tuple val(meta), path(pileup), path(matched)
|
||||
val segmentout
|
||||
|
||||
output:
|
||||
tuple val(meta), path('*.contamination.table'), emit: contamination
|
||||
|
@ -22,8 +21,8 @@ process GATK4_CALCULATECONTAMINATION {
|
|||
script:
|
||||
def args = task.ext.args ?: ''
|
||||
def prefix = task.ext.prefix ?: "${meta.id}"
|
||||
def matched_command = matched ? " -matched ${matched} " : ''
|
||||
def segment_command = segmentout ? " -segments ${prefix}.segmentation.table" : ''
|
||||
def matched_command = matched ? "--matched-normal $matched" : ''
|
||||
|
||||
def avail_mem = 3
|
||||
if (!task.memory) {
|
||||
log.info '[GATK CalculateContamination] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.'
|
||||
|
@ -32,10 +31,10 @@ process GATK4_CALCULATECONTAMINATION {
|
|||
}
|
||||
"""
|
||||
gatk --java-options "-Xmx${avail_mem}g" CalculateContamination \\
|
||||
-I $pileup \\
|
||||
--input $pileup \\
|
||||
--output ${prefix}.contamination.table \\
|
||||
$matched_command \\
|
||||
-O ${prefix}.contamination.table \\
|
||||
$segment_command \\
|
||||
--tmp-dir . \\
|
||||
$args
|
||||
|
||||
cat <<-END_VERSIONS > versions.yml
|
||||
|
|
|
@ -32,9 +32,6 @@ input:
|
|||
type: file
|
||||
description: File containing the pileups summary table of a normal sample that matches with the tumor sample specified in pileup argument. This is an optional input.
|
||||
pattern: "*.pileups.table"
|
||||
- segmentout:
|
||||
type: boolean
|
||||
description: specifies whether to output the segmentation table.
|
||||
|
||||
output:
|
||||
- contamination:
|
||||
|
@ -43,7 +40,7 @@ output:
|
|||
pattern: "*.contamination.table"
|
||||
- segmentation:
|
||||
type: file
|
||||
description: optional output table containing segmentation of tumor minor allele fractions.
|
||||
description: output table containing segmentation of tumor minor allele fractions (optional)
|
||||
pattern: "*.segmentation.table"
|
||||
- versions:
|
||||
type: file
|
||||
|
@ -52,3 +49,4 @@ output:
|
|||
|
||||
authors:
|
||||
- "@GCJMackenzie"
|
||||
- "@maxulysse"
|
||||
|
|
|
@ -9,9 +9,9 @@ process GATK4_COMBINEGVCFS {
|
|||
|
||||
input:
|
||||
tuple val(meta), path(vcf), path(vcf_idx)
|
||||
path (fasta)
|
||||
path (fasta_fai)
|
||||
path (fasta_dict)
|
||||
path fasta
|
||||
path fai
|
||||
path dict
|
||||
|
||||
output:
|
||||
tuple val(meta), path("*.combined.g.vcf.gz"), emit: combined_gvcf
|
||||
|
@ -23,21 +23,21 @@ process GATK4_COMBINEGVCFS {
|
|||
script:
|
||||
def args = task.ext.args ?: ''
|
||||
def prefix = task.ext.prefix ?: "${meta.id}"
|
||||
def avail_mem = 3
|
||||
def input_list = vcf.collect{"--variant $it"}.join(' ')
|
||||
|
||||
def avail_mem = 3
|
||||
if (!task.memory) {
|
||||
log.info '[GATK COMBINEGVCFS] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.'
|
||||
} else {
|
||||
avail_mem = task.memory.giga
|
||||
}
|
||||
def input_files = vcf.collect{"-V ${it}"}.join(' ') // add '-V' to each vcf file
|
||||
"""
|
||||
gatk \\
|
||||
--java-options "-Xmx${avail_mem}g" \\
|
||||
CombineGVCFs \\
|
||||
-R ${fasta} \\
|
||||
-O ${prefix}.combined.g.vcf.gz \\
|
||||
${args} \\
|
||||
${input_files}
|
||||
gatk --java-options "-Xmx${avail_mem}g" CombineGVCFs \\
|
||||
$input_list \\
|
||||
--output ${prefix}.combined.g.vcf.gz \\
|
||||
--reference ${fasta} \\
|
||||
--tmp-dir . \\
|
||||
$args
|
||||
|
||||
cat <<-END_VERSIONS > versions.yml
|
||||
"${task.process}":
|
||||
|
|
|
@ -19,18 +19,11 @@ tools:
|
|||
licence: ["Apache-2.0"]
|
||||
|
||||
input:
|
||||
- fasta:
|
||||
type: file
|
||||
description: The reference fasta file
|
||||
pattern: "*.fasta"
|
||||
- fai:
|
||||
type: file
|
||||
description: FASTA index file
|
||||
pattern: "*.{fai}"
|
||||
- dict:
|
||||
type: file
|
||||
description: FASTA dictionary file
|
||||
pattern: "*.{dict}"
|
||||
- meta:
|
||||
type: map
|
||||
description: |
|
||||
Groovy Map containing sample information
|
||||
e.g. [ id:'test' ]
|
||||
- vcf:
|
||||
type: file
|
||||
description: Compressed VCF files
|
||||
|
@ -38,7 +31,19 @@ input:
|
|||
- vcf_idx:
|
||||
type: file
|
||||
description: VCF Index file
|
||||
pattern: "*.{fai}"
|
||||
pattern: "*.vcf.gz.idx"
|
||||
- fasta:
|
||||
type: file
|
||||
description: The reference fasta file
|
||||
pattern: "*.fasta"
|
||||
- fai:
|
||||
type: file
|
||||
description: FASTA index file
|
||||
pattern: "*.fasta.fai"
|
||||
- dict:
|
||||
type: file
|
||||
description: FASTA dictionary file
|
||||
pattern: "*.dict"
|
||||
output:
|
||||
- gvcf:
|
||||
type: file
|
||||
|
@ -53,3 +58,4 @@ authors:
|
|||
- "@sateeshperi"
|
||||
- "@mjcipriano"
|
||||
- "@hseabolt"
|
||||
- "@maxulysse"
|
||||
|
|
|
@ -11,14 +11,15 @@ process GATK4_CREATESEQUENCEDICTIONARY {
|
|||
path fasta
|
||||
|
||||
output:
|
||||
path "*.dict" , emit: dict
|
||||
path "versions.yml" , emit: versions
|
||||
path "*.dict" , emit: dict
|
||||
path "versions.yml" , emit: versions
|
||||
|
||||
when:
|
||||
task.ext.when == null || task.ext.when
|
||||
|
||||
script:
|
||||
def args = task.ext.args ?: ''
|
||||
|
||||
def avail_mem = 6
|
||||
if (!task.memory) {
|
||||
log.info '[GATK CreateSequenceDictionary] Available memory not known - defaulting to 6GB. Specify process memory requirements to change this.'
|
||||
|
@ -26,10 +27,10 @@ process GATK4_CREATESEQUENCEDICTIONARY {
|
|||
avail_mem = task.memory.giga
|
||||
}
|
||||
"""
|
||||
gatk --java-options "-Xmx${avail_mem}g" \\
|
||||
CreateSequenceDictionary \\
|
||||
gatk --java-options "-Xmx${avail_mem}g" CreateSequenceDictionary \\
|
||||
--REFERENCE $fasta \\
|
||||
--URI $fasta \\
|
||||
--TMP_DIR . \\
|
||||
$args
|
||||
|
||||
cat <<-END_VERSIONS > versions.yml
|
||||
|
|
|
@ -9,9 +9,9 @@ process GATK4_CREATESOMATICPANELOFNORMALS {
|
|||
|
||||
input:
|
||||
tuple val(meta), path(genomicsdb)
|
||||
path fasta
|
||||
path fai
|
||||
path dict
|
||||
path fasta
|
||||
path fai
|
||||
path dict
|
||||
|
||||
output:
|
||||
tuple val(meta), path("*.vcf.gz"), emit: vcf
|
||||
|
@ -24,6 +24,7 @@ process GATK4_CREATESOMATICPANELOFNORMALS {
|
|||
script:
|
||||
def args = task.ext.args ?: ''
|
||||
def prefix = task.ext.prefix ?: "${meta.id}"
|
||||
|
||||
def avail_mem = 3
|
||||
if (!task.memory) {
|
||||
log.info '[GATK CreateSomaticPanelOfNormals] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.'
|
||||
|
@ -31,11 +32,11 @@ process GATK4_CREATESOMATICPANELOFNORMALS {
|
|||
avail_mem = task.memory.giga
|
||||
}
|
||||
"""
|
||||
gatk --java-options "-Xmx${avail_mem}g" \\
|
||||
CreateSomaticPanelOfNormals \\
|
||||
-R $fasta \\
|
||||
-V gendb://$genomicsdb \\
|
||||
-O ${prefix}.vcf.gz \\
|
||||
gatk --java-options "-Xmx${avail_mem}g" CreateSomaticPanelOfNormals \\
|
||||
--variant gendb://$genomicsdb \\
|
||||
--output ${prefix}.vcf.gz \\
|
||||
--reference $fasta \\
|
||||
--tmp-dir . \\
|
||||
$args
|
||||
|
||||
cat <<-END_VERSIONS > versions.yml
|
||||
|
|
|
@ -44,7 +44,7 @@ output:
|
|||
pattern: "*.vcf.gz"
|
||||
- tbi:
|
||||
type: file
|
||||
description: Index of vcf file
|
||||
description: Tabix index of vcf file
|
||||
pattern: "*vcf.gz.tbi"
|
||||
- versions:
|
||||
type: file
|
||||
|
|
|
@ -8,14 +8,14 @@ process GATK4_ESTIMATELIBRARYCOMPLEXITY {
|
|||
'quay.io/biocontainers/gatk4:4.2.5.0--hdfd78af_0' }"
|
||||
|
||||
input:
|
||||
tuple val(meta), path(cram)
|
||||
path(fasta)
|
||||
path(fai)
|
||||
path(dict)
|
||||
tuple val(meta), path(input)
|
||||
path fasta
|
||||
path fai
|
||||
path dict
|
||||
|
||||
output:
|
||||
tuple val(meta), path('*.metrics'), emit: metrics
|
||||
path "versions.yml" , emit: versions
|
||||
path "versions.yml" , emit: versions
|
||||
|
||||
when:
|
||||
task.ext.when == null || task.ext.when
|
||||
|
@ -23,7 +23,7 @@ process GATK4_ESTIMATELIBRARYCOMPLEXITY {
|
|||
script:
|
||||
def args = task.ext.args ?: ''
|
||||
def prefix = task.ext.prefix ?: "${meta.id}"
|
||||
def crams = cram.collect(){ x -> "-I ".concat(x.toString()) }.join(" ")
|
||||
def input_list = input.collect(){"--INPUT $it"}.join(" ")
|
||||
|
||||
def avail_mem = 3
|
||||
if (!task.memory) {
|
||||
|
@ -32,12 +32,12 @@ process GATK4_ESTIMATELIBRARYCOMPLEXITY {
|
|||
avail_mem = task.memory.giga
|
||||
}
|
||||
"""
|
||||
gatk --java-options "-Xmx${avail_mem}g" EstimateLibraryComplexity \
|
||||
${crams} \
|
||||
-O ${prefix}.metrics \
|
||||
--REFERENCE_SEQUENCE ${fasta} \
|
||||
--VALIDATION_STRINGENCY SILENT \
|
||||
--TMP_DIR . $args
|
||||
gatk --java-options "-Xmx${avail_mem}g" EstimateLibraryComplexity \\
|
||||
$input_list \\
|
||||
--OUTPUT ${prefix}.metrics \\
|
||||
--REFERENCE_SEQUENCE ${fasta} \\
|
||||
--TMP_DIR . \\
|
||||
$args
|
||||
|
||||
cat <<-END_VERSIONS > versions.yml
|
||||
"${task.process}":
|
||||
|
|
|
@ -20,7 +20,7 @@ input:
|
|||
description: |
|
||||
Groovy Map containing sample information
|
||||
e.g. [ id:'test', single_end:false ]
|
||||
- cram:
|
||||
- input:
|
||||
type: file
|
||||
description: BAM/CRAM/SAM file
|
||||
pattern: "*.{bam,cram,sam}"
|
||||
|
@ -54,3 +54,4 @@ output:
|
|||
|
||||
authors:
|
||||
- "@FriederikeHanssen"
|
||||
- "@maxulysse"
|
||||
|
|
|
@ -20,7 +20,8 @@ process GATK4_FASTQTOSAM {
|
|||
script:
|
||||
def args = task.ext.args ?: ''
|
||||
def prefix = task.ext.prefix ?: "${meta.id}"
|
||||
def read_files = meta.single_end ? "-F1 $reads" : "-F1 ${reads[0]} -F2 ${reads[1]}"
|
||||
def reads_command = meta.single_end ? "--FASTQ $reads" : "--FASTQ ${reads[0]} --FASTQ2 ${reads[1]}"
|
||||
|
||||
def avail_mem = 3
|
||||
if (!task.memory) {
|
||||
log.info '[GATK FastqToSam] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.'
|
||||
|
@ -29,9 +30,10 @@ process GATK4_FASTQTOSAM {
|
|||
}
|
||||
"""
|
||||
gatk --java-options "-Xmx${avail_mem}g" FastqToSam \\
|
||||
$read_files \\
|
||||
-O ${prefix}.bam \\
|
||||
-SM $prefix \\
|
||||
$reads_command \\
|
||||
--OUTPUT ${prefix}.bam \\
|
||||
--SAMPLE_NAME $prefix \\
|
||||
--TMP_DIR . \\
|
||||
$args
|
||||
|
||||
cat <<-END_VERSIONS > versions.yml
|
||||
|
|
|
@ -34,14 +34,14 @@ output:
|
|||
description: |
|
||||
Groovy Map containing sample information
|
||||
e.g. [ id:'test', single_end:false ]
|
||||
- versions:
|
||||
type: file
|
||||
description: File containing software versions
|
||||
pattern: "versions.yml"
|
||||
- bam:
|
||||
type: file
|
||||
description: Converted BAM file
|
||||
pattern: "*.bam"
|
||||
- versions:
|
||||
type: file
|
||||
description: File containing software versions
|
||||
pattern: "versions.yml"
|
||||
|
||||
authors:
|
||||
- "@ntoda03"
|
||||
|
|
|
@ -8,10 +8,10 @@ process GATK4_FILTERMUTECTCALLS {
|
|||
'quay.io/biocontainers/gatk4:4.2.5.0--hdfd78af_0' }"
|
||||
|
||||
input:
|
||||
tuple val(meta), path(vcf), path(tbi), path(stats), path(orientationbias), path(segmentation), path(contaminationfile), val(contaminationest)
|
||||
path fasta
|
||||
path fai
|
||||
path dict
|
||||
tuple val(meta), path(vcf), path(vcf_tbi), path(stats), path(orientationbias), path(segmentation), path(table), val(estimate)
|
||||
path fasta
|
||||
path fai
|
||||
path dict
|
||||
|
||||
output:
|
||||
tuple val(meta), path("*.vcf.gz") , emit: vcf
|
||||
|
@ -26,20 +26,11 @@ process GATK4_FILTERMUTECTCALLS {
|
|||
def args = task.ext.args ?: ''
|
||||
def prefix = task.ext.prefix ?: "${meta.id}"
|
||||
|
||||
def orientationbias_options = ''
|
||||
if (orientationbias) {
|
||||
orientationbias_options = '--orientation-bias-artifact-priors ' + orientationbias.join(' --orientation-bias-artifact-priors ')
|
||||
}
|
||||
def orientationbias_command = orientationbias ? orientationbias.collect{"--orientation-bias-artifact-priors $it"}.join(' ') : ''
|
||||
def segmentation_command = segmentation ? segmentation.collect{"--tumor-segmentation $it"}.join(' ') : ''
|
||||
def estimate_command = estimate ? " --contamination-estimate ${estimate} " : ''
|
||||
def table_command = table ? " --contamination-table ${table} " : ''
|
||||
|
||||
def segmentation_options = ''
|
||||
if (segmentation) {
|
||||
segmentation_options = '--tumor-segmentation ' + segmentation.join(' --tumor-segmentation ')
|
||||
}
|
||||
|
||||
def contamination_options = contaminationest ? " --contamination-estimate ${contaminationest} " : ''
|
||||
if (contaminationfile) {
|
||||
contamination_options = '--contamination-table ' + contaminationfile.join(' --contamination-table ')
|
||||
}
|
||||
def avail_mem = 3
|
||||
if (!task.memory) {
|
||||
log.info '[GATK FilterMutectCalls] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.'
|
||||
|
@ -48,12 +39,14 @@ process GATK4_FILTERMUTECTCALLS {
|
|||
}
|
||||
"""
|
||||
gatk --java-options "-Xmx${avail_mem}g" FilterMutectCalls \\
|
||||
-R $fasta \\
|
||||
-V $vcf \\
|
||||
$orientationbias_options \\
|
||||
$segmentation_options \\
|
||||
$contamination_options \\
|
||||
-O ${prefix}.vcf.gz \\
|
||||
--variant $vcf \\
|
||||
--output ${prefix}.vcf.gz \\
|
||||
--reference $fasta \\
|
||||
$orientationbias_command \\
|
||||
$segmentation_command \\
|
||||
$estimate_command \\
|
||||
$table_command \\
|
||||
--tmp-dir . \\
|
||||
$args
|
||||
|
||||
cat <<-END_VERSIONS > versions.yml
|
||||
|
|
|
@ -26,9 +26,9 @@ input:
|
|||
type: file
|
||||
description: compressed vcf file of mutect2calls
|
||||
pattern: "*.vcf.gz"
|
||||
- tbi:
|
||||
- vcf_tbi:
|
||||
type: file
|
||||
description: Index of vcf file
|
||||
description: Tabix index of vcf file
|
||||
pattern: "*vcf.gz.tbi"
|
||||
- stats:
|
||||
type: file
|
||||
|
@ -42,13 +42,13 @@ input:
|
|||
type: list
|
||||
description: tables containing segmentation information for input vcf. Optional input.
|
||||
pattern: "*.segmentation.table"
|
||||
- contaminationfile:
|
||||
- table:
|
||||
type: list
|
||||
description: table(s) containing contamination contamination data for input vcf. Optional input, takes priority over contaminationest.
|
||||
description: table(s) containing contamination data for input vcf. Optional input, takes priority over estimate.
|
||||
pattern: "*.contamination.table"
|
||||
- contaminationest:
|
||||
- estimate:
|
||||
type: val
|
||||
description: estimation of contamination value as a double. Optional input, will only be used if contaminationfile is not specified.
|
||||
description: estimation of contamination value as a double. Optional input, will only be used if table is not specified.
|
||||
- fasta:
|
||||
type: file
|
||||
description: The reference fasta file
|
||||
|
@ -82,3 +82,4 @@ output:
|
|||
|
||||
authors:
|
||||
- "@GCJMackenzie"
|
||||
- "@maxulysse"
|
||||
|
|
|
@ -8,7 +8,7 @@ process GATK4_GATHERBQSRREPORTS {
|
|||
'quay.io/biocontainers/gatk4:4.2.5.0--hdfd78af_0' }"
|
||||
|
||||
input:
|
||||
tuple val(meta), path(recal_table)
|
||||
tuple val(meta), path(table)
|
||||
|
||||
output:
|
||||
tuple val(meta), path("*.table"), emit: table
|
||||
|
@ -20,7 +20,7 @@ process GATK4_GATHERBQSRREPORTS {
|
|||
script:
|
||||
def args = task.ext.args ?: ''
|
||||
def prefix = task.ext.prefix ?: "${meta.id}"
|
||||
def input = recal_table.collect{"-I ${it}"}.join(' ')
|
||||
def input_list = table.collect{"--input $it"}.join(' ')
|
||||
|
||||
def avail_mem = 3
|
||||
if (!task.memory) {
|
||||
|
@ -29,12 +29,11 @@ process GATK4_GATHERBQSRREPORTS {
|
|||
avail_mem = task.memory.giga
|
||||
}
|
||||
"""
|
||||
gatk --java-options "-Xmx${avail_mem}g" \\
|
||||
GatherBQSRReports \
|
||||
${input} \
|
||||
--tmp-dir . \
|
||||
$args \
|
||||
--output ${prefix}.table
|
||||
gatk --java-options "-Xmx${avail_mem}g" GatherBQSRReports \\
|
||||
$input_list \\
|
||||
--output ${prefix}.table \\
|
||||
--tmp-dir . \\
|
||||
$args
|
||||
|
||||
cat <<-END_VERSIONS > versions.yml
|
||||
"${task.process}":
|
||||
|
|
|
@ -19,7 +19,7 @@ input:
|
|||
description: |
|
||||
Groovy Map containing sample information
|
||||
e.g. [ id:'test', single_end:false ]
|
||||
- recal_table:
|
||||
- table:
|
||||
type: file
|
||||
description: File(s) containing BQSR table(s)
|
||||
pattern: "*.table"
|
||||
|
@ -30,14 +30,14 @@ output:
|
|||
description: |
|
||||
Groovy Map containing sample information
|
||||
e.g. [ id:'test', single_end:false ]
|
||||
- table:
|
||||
type: file
|
||||
description: File containing joined BQSR table
|
||||
pattern: "*.table"
|
||||
- versions:
|
||||
type: file
|
||||
description: File containing software versions
|
||||
pattern: "versions.yml"
|
||||
- recal_table:
|
||||
type: file
|
||||
description: File containing joined BQSR table
|
||||
pattern: "*.table"
|
||||
|
||||
authors:
|
||||
- "@FriederikeHanssen"
|
||||
|
|
|
@ -10,11 +10,11 @@ process GATK4_GATHERPILEUPSUMMARIES {
|
|||
|
||||
input:
|
||||
tuple val(meta), path(pileup)
|
||||
path dict
|
||||
path dict
|
||||
|
||||
output:
|
||||
tuple val(meta), path("*.pileupsummaries.table"), emit: table
|
||||
path "versions.yml" , emit: versions
|
||||
path "versions.yml" , emit: versions
|
||||
|
||||
when:
|
||||
task.ext.when == null || task.ext.when
|
||||
|
@ -22,7 +22,7 @@ process GATK4_GATHERPILEUPSUMMARIES {
|
|||
script:
|
||||
def args = task.ext.args ?: ''
|
||||
def prefix = task.ext.prefix ?: "${meta.id}"
|
||||
def input = pileup.collect{ "-I ${it} " }.join(' ')
|
||||
def input_list = pileup.collect{ "--I $it" }.join(' ')
|
||||
|
||||
def avail_mem = 3
|
||||
if (!task.memory) {
|
||||
|
@ -31,11 +31,12 @@ process GATK4_GATHERPILEUPSUMMARIES {
|
|||
avail_mem = task.memory.giga
|
||||
}
|
||||
"""
|
||||
gatk --java-options "-Xmx${avail_mem}g" \
|
||||
GatherPileupSummaries \
|
||||
--sequence-dictionary ${dict} \
|
||||
${input} \
|
||||
-O ${prefix}.pileupsummaries.table
|
||||
gatk --java-options "-Xmx${avail_mem}g" GatherPileupSummaries \\
|
||||
$input_list \\
|
||||
--O ${prefix}.pileupsummaries.table \\
|
||||
--sequence-dictionary $dict \\
|
||||
--tmp-dir . \\
|
||||
$args
|
||||
|
||||
cat <<-END_VERSIONS > versions.yml
|
||||
"${task.process}":
|
||||
|
|
|
@ -28,14 +28,15 @@ output:
|
|||
description: |
|
||||
Groovy Map containing sample information
|
||||
e.g. [ id:'test', single_end:false ]
|
||||
- table:
|
||||
type: file
|
||||
description: pileup summaries table file
|
||||
pattern: "*.pileupsummaries.table"
|
||||
- versions:
|
||||
type: file
|
||||
description: File containing software versions
|
||||
pattern: "versions.yml"
|
||||
- table:
|
||||
type: file
|
||||
description: Pileup file
|
||||
pattern: "*.pileups.table"
|
||||
|
||||
authors:
|
||||
- "@FriederikeHanssen"
|
||||
- "@maxulysse"
|
||||
|
|
|
@ -8,13 +8,13 @@ process GATK4_GENOMICSDBIMPORT {
|
|||
'quay.io/biocontainers/gatk4:4.2.5.0--hdfd78af_0' }"
|
||||
|
||||
input:
|
||||
tuple val(meta), path(vcf), path(tbi), path(intervalfile), val(intervalval), path(wspace)
|
||||
val run_intlist
|
||||
val run_updatewspace
|
||||
val input_map
|
||||
tuple val(meta), path(vcf), path(tbi), path(interval_file), val(interval_value), path(wspace)
|
||||
val run_intlist
|
||||
val run_updatewspace
|
||||
val input_map
|
||||
|
||||
output:
|
||||
tuple val(meta), path("${prefix}") , optional:true, emit: genomicsdb
|
||||
tuple val(meta), path("$prefix") , optional:true, emit: genomicsdb
|
||||
tuple val(meta), path("$updated_db") , optional:true, emit: updatedb
|
||||
tuple val(meta), path("*.interval_list"), optional:true, emit: intervallist
|
||||
path "versions.yml" , emit: versions
|
||||
|
@ -27,22 +27,22 @@ process GATK4_GENOMICSDBIMPORT {
|
|||
prefix = task.ext.prefix ?: "${meta.id}"
|
||||
|
||||
// settings for running default create gendb mode
|
||||
inputs_command = input_map ? "--sample-name-map ${vcf[0]}" : "${'-V ' + vcf.join(' -V ')}"
|
||||
dir_command = "--genomicsdb-workspace-path ${prefix}"
|
||||
intervals_command = intervalfile ? " -L ${intervalfile} " : " -L ${intervalval} "
|
||||
input_command = input_map ? "--sample-name-map ${vcf[0]}" : vcf.collect(){"--variant $it"}.join(' ')
|
||||
|
||||
genomicsdb_command = "--genomicsdb-workspace-path ${prefix}"
|
||||
interval_command = interval_file ? "--intervals ${interval_file}" : "--intervals ${interval_value}"
|
||||
|
||||
// settings changed for running get intervals list mode if run_intlist is true
|
||||
if (run_intlist) {
|
||||
inputs_command = ''
|
||||
dir_command = "--genomicsdb-update-workspace-path ${wspace}"
|
||||
intervals_command = "--output-interval-list-to-file ${prefix}.interval_list"
|
||||
genomicsdb_command = "--genomicsdb-update-workspace-path ${wspace}"
|
||||
interval_command = "--output-interval-list-to-file ${prefix}.interval_list"
|
||||
}
|
||||
|
||||
// settings changed for running update gendb mode. inputs_command same as default, update_db forces module to emit the updated gendb
|
||||
// settings changed for running update gendb mode. input_command same as default, update_db forces module to emit the updated gendb
|
||||
if (run_updatewspace) {
|
||||
dir_command = "--genomicsdb-update-workspace-path ${wspace}"
|
||||
intervals_command = ''
|
||||
updated_db = wspace.toString()
|
||||
genomicsdb_command = "--genomicsdb-update-workspace-path ${wspace}"
|
||||
interval_command = ''
|
||||
updated_db = "${wspace}"
|
||||
}
|
||||
|
||||
def avail_mem = 3
|
||||
|
@ -53,9 +53,10 @@ process GATK4_GENOMICSDBIMPORT {
|
|||
}
|
||||
"""
|
||||
gatk --java-options "-Xmx${avail_mem}g" GenomicsDBImport \\
|
||||
$inputs_command \\
|
||||
$dir_command \\
|
||||
$intervals_command \\
|
||||
$input_command \\
|
||||
$genomicsdb_command \\
|
||||
$interval_command \\
|
||||
--tmp-dir . \\
|
||||
$args
|
||||
|
||||
cat <<-END_VERSIONS > versions.yml
|
||||
|
|
|
@ -10,10 +10,10 @@ process GATK4_GENOTYPEGVCFS {
|
|||
input:
|
||||
tuple val(meta), path(gvcf), path(gvcf_index), path(intervals), path(intervals_index)
|
||||
path fasta
|
||||
path fasta_index
|
||||
path fasta_dict
|
||||
path fai
|
||||
path dict
|
||||
path dbsnp
|
||||
path dbsnp_index
|
||||
path dbsnp_tbi
|
||||
|
||||
output:
|
||||
tuple val(meta), path("*.vcf.gz"), emit: vcf
|
||||
|
@ -26,9 +26,10 @@ process GATK4_GENOTYPEGVCFS {
|
|||
script:
|
||||
def args = task.ext.args ?: ''
|
||||
def prefix = task.ext.prefix ?: "${meta.id}"
|
||||
def dbsnp_options = dbsnp ? "-D ${dbsnp}" : ""
|
||||
def interval_options = intervals ? "-L ${intervals}" : ""
|
||||
def gvcf_options = gvcf.name.endsWith(".vcf") || gvcf.name.endsWith(".vcf.gz") ? "$gvcf" : "gendb://$gvcf"
|
||||
def gvcf_command = gvcf.name.endsWith(".vcf") || gvcf.name.endsWith(".vcf.gz") ? "$gvcf" : "gendb://$gvcf"
|
||||
def dbsnp_command = dbsnp ? "--dbsnp $dbsnp" : ""
|
||||
def interval_command = intervals ? "--intervals $intervals" : ""
|
||||
|
||||
def avail_mem = 3
|
||||
if (!task.memory) {
|
||||
log.info '[GATK GenotypeGVCFs] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.'
|
||||
|
@ -36,14 +37,14 @@ process GATK4_GENOTYPEGVCFS {
|
|||
avail_mem = task.memory.giga
|
||||
}
|
||||
"""
|
||||
gatk --java-options "-Xmx${avail_mem}g" \\
|
||||
GenotypeGVCFs \\
|
||||
$args \\
|
||||
$interval_options \\
|
||||
$dbsnp_options \\
|
||||
-R $fasta \\
|
||||
-V $gvcf_options \\
|
||||
-O ${prefix}.vcf.gz
|
||||
gatk --java-options "-Xmx${avail_mem}g" GenotypeGVCFs \\
|
||||
--variant $gvcf_command \\
|
||||
--output ${prefix}.vcf.gz \\
|
||||
--reference $fasta \\
|
||||
$interval_command \\
|
||||
$dbsnp_command \\
|
||||
--tmp-dir . \\
|
||||
$args
|
||||
|
||||
cat <<-END_VERSIONS > versions.yml
|
||||
"${task.process}":
|
||||
|
|
|
@ -21,10 +21,15 @@ input:
|
|||
Groovy Map containing sample information
|
||||
e.g. [ id:'test', single_end:false ]
|
||||
- gvcf:
|
||||
type: tuple of files
|
||||
type: file
|
||||
description: |
|
||||
Tuple of gVCF(.gz) file (first) and its index (second) or the path to a GenomicsDB (and empty)
|
||||
pattern: ["*.{vcf,vcf.gz}", "*.{idx,tbi}"]
|
||||
gVCF(.gz) file or to a GenomicsDB
|
||||
pattern: "*.{vcf,vcf.gz}"
|
||||
- gvcf_index:
|
||||
type: file
|
||||
description: |
|
||||
index of gvcf file, or empty when providing GenomicsDB
|
||||
pattern: "*.{idx,tbi}"
|
||||
- intervals:
|
||||
type: file
|
||||
description: Interval file with the genomic regions included in the library (optional)
|
||||
|
@ -35,11 +40,11 @@ input:
|
|||
type: file
|
||||
description: Reference fasta file
|
||||
pattern: "*.fasta"
|
||||
- fasta_index:
|
||||
- fai:
|
||||
type: file
|
||||
description: Reference fasta index file
|
||||
pattern: "*.fai"
|
||||
- fasta_dict:
|
||||
- dict:
|
||||
type: file
|
||||
description: Reference fasta sequence dict file
|
||||
pattern: "*.dict"
|
||||
|
@ -47,8 +52,8 @@ input:
|
|||
type: file
|
||||
description: dbSNP VCF file
|
||||
pattern: "*.vcf.gz"
|
||||
- dbsnp_index:
|
||||
type: tuple of files
|
||||
- dbsnp_tbi:
|
||||
type: file
|
||||
description: dbSNP VCF index file
|
||||
pattern: "*.tbi"
|
||||
|
||||
|
@ -73,3 +78,4 @@ output:
|
|||
|
||||
authors:
|
||||
- "@santiagorevale"
|
||||
- "@maxulysse"
|
||||
|
|
|
@ -9,15 +9,15 @@ process GATK4_GETPILEUPSUMMARIES {
|
|||
|
||||
input:
|
||||
tuple val(meta), path(input), path(index), path(intervals)
|
||||
path fasta
|
||||
path fai
|
||||
path dict
|
||||
path variants
|
||||
path variants_tbi
|
||||
path fasta
|
||||
path fai
|
||||
path dict
|
||||
path variants
|
||||
path variants_tbi
|
||||
|
||||
output:
|
||||
tuple val(meta), path('*.pileups.table'), emit: table
|
||||
path "versions.yml" , emit: versions
|
||||
path "versions.yml" , emit: versions
|
||||
|
||||
when:
|
||||
task.ext.when == null || task.ext.when
|
||||
|
@ -25,8 +25,8 @@ process GATK4_GETPILEUPSUMMARIES {
|
|||
script:
|
||||
def args = task.ext.args ?: ''
|
||||
def prefix = task.ext.prefix ?: "${meta.id}"
|
||||
def sitesCommand = intervals ? " -L ${intervals} " : " -L ${variants} "
|
||||
def reference = fasta ? " -R ${fasta}" :""
|
||||
def interval_command = intervals ? "--intervals $intervals" : ""
|
||||
def reference_command = fasta ? "--reference $fasta" : ''
|
||||
|
||||
def avail_mem = 3
|
||||
if (!task.memory) {
|
||||
|
@ -36,11 +36,12 @@ process GATK4_GETPILEUPSUMMARIES {
|
|||
}
|
||||
"""
|
||||
gatk --java-options "-Xmx${avail_mem}g" GetPileupSummaries \\
|
||||
-I $input \\
|
||||
-V $variants \\
|
||||
$sitesCommand \\
|
||||
${reference} \\
|
||||
-O ${prefix}.pileups.table \\
|
||||
--input $input \\
|
||||
--variant $variants \\
|
||||
--output ${prefix}.pileups.table \\
|
||||
$reference_command \\
|
||||
$sites_command \\
|
||||
--tmp-dir . \\
|
||||
$args
|
||||
|
||||
cat <<-END_VERSIONS > versions.yml
|
||||
|
|
|
@ -9,15 +9,15 @@ process GATK4_HAPLOTYPECALLER {
|
|||
|
||||
input:
|
||||
tuple val(meta), path(input), path(input_index), path(intervals)
|
||||
path fasta
|
||||
path fai
|
||||
path dict
|
||||
path dbsnp
|
||||
path dbsnp_tbi
|
||||
path fasta
|
||||
path fai
|
||||
path dict
|
||||
path dbsnp
|
||||
path dbsnp_tbi
|
||||
|
||||
output:
|
||||
tuple val(meta), path("*.vcf.gz"), emit: vcf
|
||||
tuple val(meta), path("*.tbi") , emit: tbi
|
||||
tuple val(meta), path("*.tbi") , optional:true, emit: tbi
|
||||
path "versions.yml" , emit: versions
|
||||
|
||||
when:
|
||||
|
@ -26,25 +26,24 @@ process GATK4_HAPLOTYPECALLER {
|
|||
script:
|
||||
def args = task.ext.args ?: ''
|
||||
def prefix = task.ext.prefix ?: "${meta.id}"
|
||||
def interval_option = intervals ? "-L ${intervals}" : ""
|
||||
def dbsnp_option = dbsnp ? "-D ${dbsnp}" : ""
|
||||
def avail_mem = 3
|
||||
def dbsnp_command = dbsnp ? "--dbsnp $dbsnp" : ""
|
||||
def interval_command = intervals ? "--intervals $intervals" : ""
|
||||
|
||||
def avail_mem = 3
|
||||
if (!task.memory) {
|
||||
log.info '[GATK HaplotypeCaller] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.'
|
||||
} else {
|
||||
avail_mem = task.memory.giga
|
||||
}
|
||||
"""
|
||||
gatk \\
|
||||
--java-options "-Xmx${avail_mem}g" \\
|
||||
HaplotypeCaller \\
|
||||
-R $fasta \\
|
||||
-I $input \\
|
||||
${dbsnp_option} \\
|
||||
${interval_option} \\
|
||||
-O ${prefix}.vcf.gz \\
|
||||
$args \\
|
||||
--tmp-dir .
|
||||
gatk --java-options "-Xmx${avail_mem}g" HaplotypeCaller \\
|
||||
--input $input \\
|
||||
--output ${prefix}.vcf.gz \\
|
||||
--reference $fasta \\
|
||||
$dbsnp_command \\
|
||||
$interval_command \\
|
||||
--tmp-dir . \\
|
||||
$args
|
||||
|
||||
cat <<-END_VERSIONS > versions.yml
|
||||
"${task.process}":
|
||||
|
|
|
@ -19,6 +19,7 @@ process GATK4_INDEXFEATUREFILE {
|
|||
|
||||
script:
|
||||
def args = task.ext.args ?: ''
|
||||
|
||||
def avail_mem = 3
|
||||
if (!task.memory) {
|
||||
log.info '[GATK IndexFeatureFile] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.'
|
||||
|
@ -26,10 +27,10 @@ process GATK4_INDEXFEATUREFILE {
|
|||
avail_mem = task.memory.giga
|
||||
}
|
||||
"""
|
||||
gatk --java-options "-Xmx${avail_mem}g" \\
|
||||
IndexFeatureFile \\
|
||||
$args \\
|
||||
-I $feature_file
|
||||
gatk --java-options "-Xmx${avail_mem}g" IndexFeatureFile \\
|
||||
--input $feature_file \\
|
||||
--tmp-dir . \\
|
||||
$args
|
||||
|
||||
cat <<-END_VERSIONS > versions.yml
|
||||
"${task.process}":
|
||||
|
|
|
@ -8,7 +8,7 @@ process GATK4_INTERVALLISTTOBED {
|
|||
'quay.io/biocontainers/gatk4:4.2.5.0--hdfd78af_0' }"
|
||||
|
||||
input:
|
||||
tuple val(meta), path(interval)
|
||||
tuple val(meta), path(intervals)
|
||||
|
||||
output:
|
||||
tuple val(meta), path("*.bed"), emit: bed
|
||||
|
@ -29,8 +29,9 @@ process GATK4_INTERVALLISTTOBED {
|
|||
}
|
||||
"""
|
||||
gatk --java-options "-Xmx${avail_mem}g" IntervalListToBed \\
|
||||
--INPUT ${interval} \\
|
||||
--INPUT $intervals \\
|
||||
--OUTPUT ${prefix}.bed \\
|
||||
--TMP_DIR . \\
|
||||
$args
|
||||
|
||||
cat <<-END_VERSIONS > versions.yml
|
||||
|
|
|
@ -8,11 +8,11 @@ process GATK4_INTERVALLISTTOOLS {
|
|||
'quay.io/biocontainers/gatk4:4.2.5.0--hdfd78af_0' }"
|
||||
|
||||
input:
|
||||
tuple val(meta), path(interval_list)
|
||||
tuple val(meta), path(intervals)
|
||||
|
||||
output:
|
||||
tuple val(meta), path("*_split/*/*.interval_list"), emit: interval_list
|
||||
path "versions.yml" , emit: versions
|
||||
path "versions.yml" , emit: versions
|
||||
|
||||
when:
|
||||
task.ext.when == null || task.ext.when
|
||||
|
@ -20,6 +20,7 @@ process GATK4_INTERVALLISTTOOLS {
|
|||
script:
|
||||
def args = task.ext.args ?: ''
|
||||
def prefix = task.ext.prefix ?: "${meta.id}"
|
||||
|
||||
def avail_mem = 3
|
||||
if (!task.memory) {
|
||||
log.info '[GATK IntervalListTools] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.'
|
||||
|
@ -30,10 +31,10 @@ process GATK4_INTERVALLISTTOOLS {
|
|||
|
||||
mkdir ${prefix}_split
|
||||
|
||||
gatk --java-options "-Xmx${avail_mem}g" \\
|
||||
IntervalListTools \\
|
||||
-I ${interval_list} \\
|
||||
-O ${prefix}_split \\
|
||||
gatk --java-options "-Xmx${avail_mem}g" IntervalListTools \\
|
||||
--INPUT $intervals \\
|
||||
--OUTPUT ${prefix}_split \\
|
||||
--TMP_DIR . \\
|
||||
$args
|
||||
|
||||
python3 <<CODE
|
||||
|
|
|
@ -20,8 +20,8 @@ process GATK4_LEARNREADORIENTATIONMODEL {
|
|||
script:
|
||||
def args = task.ext.args ?: ''
|
||||
def prefix = task.ext.prefix ?: "${meta.id}"
|
||||
def inputs_list = []
|
||||
f1r2.each() { a -> inputs_list.add(" -I " + a) }
|
||||
def input_list = f1r2.collect{"--input $it"}.join(' ')
|
||||
|
||||
def avail_mem = 3
|
||||
if (!task.memory) {
|
||||
log.info '[GATK LearnReadOrientationModel] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.'
|
||||
|
@ -29,10 +29,10 @@ process GATK4_LEARNREADORIENTATIONMODEL {
|
|||
avail_mem = task.memory.giga
|
||||
}
|
||||
"""
|
||||
gatk --java-options "-Xmx${avail_mem}g" \\
|
||||
LearnReadOrientationModel \\
|
||||
${inputs_list.join(' ')} \\
|
||||
-O ${prefix}.tar.gz \\
|
||||
gatk --java-options "-Xmx${avail_mem}g" LearnReadOrientationModel \\
|
||||
$input_list \\
|
||||
--output ${prefix}.tar.gz \\
|
||||
--tmp-dir . \\
|
||||
$args
|
||||
|
||||
cat <<-END_VERSIONS > versions.yml
|
||||
|
|
|
@ -8,11 +8,11 @@ process GATK4_MARKDUPLICATES {
|
|||
'quay.io/biocontainers/gatk4:4.2.5.0--hdfd78af_0' }"
|
||||
|
||||
input:
|
||||
tuple val(meta), path(bams)
|
||||
tuple val(meta), path(bam)
|
||||
|
||||
output:
|
||||
tuple val(meta), path("*.bam") , emit: bam
|
||||
tuple val(meta), path("*.bai") , emit: bai
|
||||
tuple val(meta), path("*.bai") , optional:true, emit: bai
|
||||
tuple val(meta), path("*.metrics"), emit: metrics
|
||||
path "versions.yml" , emit: versions
|
||||
|
||||
|
@ -22,7 +22,8 @@ process GATK4_MARKDUPLICATES {
|
|||
script:
|
||||
def args = task.ext.args ?: ''
|
||||
def prefix = task.ext.prefix ?: "${meta.id}"
|
||||
def bam_list = bams.collect(){ bam -> "--INPUT ".concat(bam.toString()) }.join(" ")
|
||||
def input_list = bam.collect{"--INPUT $it"}.join(' ')
|
||||
|
||||
def avail_mem = 3
|
||||
if (!task.memory) {
|
||||
log.info '[GATK MarkDuplicates] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.'
|
||||
|
@ -31,11 +32,10 @@ process GATK4_MARKDUPLICATES {
|
|||
}
|
||||
"""
|
||||
gatk --java-options "-Xmx${avail_mem}g" MarkDuplicates \\
|
||||
$bam_list \\
|
||||
$input_list \\
|
||||
--OUTPUT ${prefix}.bam \\
|
||||
--METRICS_FILE ${prefix}.metrics \\
|
||||
--TMP_DIR . \\
|
||||
--CREATE_INDEX true \\
|
||||
--OUTPUT ${prefix}.bam \\
|
||||
$args
|
||||
|
||||
cat <<-END_VERSIONS > versions.yml
|
||||
|
|
|
@ -49,3 +49,4 @@ output:
|
|||
authors:
|
||||
- "@ajodeh-juma"
|
||||
- "@FriederikeHanssen"
|
||||
- "@maxulysse"
|
||||
|
|
50
modules/gatk4/markduplicatesspark/main.nf
Normal file
50
modules/gatk4/markduplicatesspark/main.nf
Normal file
|
@ -0,0 +1,50 @@
|
|||
process GATK4_MARKDUPLICATES_SPARK {
|
||||
tag "$meta.id"
|
||||
label 'process_high'
|
||||
|
||||
conda (params.enable_conda ? "bioconda::gatk4=4.2.3.0" : null)
|
||||
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
|
||||
'https://depot.galaxyproject.org/singularity/gatk4:4.2.3.0--hdfd78af_0' :
|
||||
'broadinstitute/gatk:4.2.3.0' }"
|
||||
|
||||
input:
|
||||
tuple val(meta), path(bam)
|
||||
path fasta
|
||||
path fasta_fai
|
||||
path dict
|
||||
|
||||
output:
|
||||
tuple val(meta), path("${prefix}"), emit: output
|
||||
path "versions.yml" , emit: versions
|
||||
|
||||
when:
|
||||
task.ext.when == null || task.ext.when
|
||||
|
||||
script:
|
||||
def args = task.ext.args ?: ''
|
||||
prefix = task.ext.prefix ?: "${meta.id}"
|
||||
def input_list = bam.collect{"--input $it"}.join(' ')
|
||||
|
||||
def avail_mem = 3
|
||||
if (!task.memory) {
|
||||
log.info '[GATK MarkDuplicatesSpark] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.'
|
||||
} else {
|
||||
avail_mem = task.memory.giga
|
||||
}
|
||||
"""
|
||||
export SPARK_USER=spark3
|
||||
|
||||
gatk --java-options "-Xmx${avail_mem}g" MarkDuplicatesSpark \\
|
||||
$input_list \\
|
||||
--output $prefix \\
|
||||
--reference $fasta \\
|
||||
--spark-master local[${task.cpus}] \\
|
||||
--tmp-dir . \\
|
||||
$args
|
||||
|
||||
cat <<-END_VERSIONS > versions.yml
|
||||
"${task.process}":
|
||||
gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//')
|
||||
END_VERSIONS
|
||||
"""
|
||||
}
|
60
modules/gatk4/markduplicatesspark/meta.yml
Normal file
60
modules/gatk4/markduplicatesspark/meta.yml
Normal file
|
@ -0,0 +1,60 @@
|
|||
name: gatk4_markduplicates_spark
|
||||
description: This tool locates and tags duplicate reads in a BAM or SAM file, where duplicate reads are defined as originating from a single fragment of DNA.
|
||||
keywords:
|
||||
- markduplicates
|
||||
- bam
|
||||
- sort
|
||||
tools:
|
||||
- gatk4:
|
||||
description:
|
||||
Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools
|
||||
with a primary focus on variant discovery and genotyping. Its powerful processing engine
|
||||
and high-performance computing features make it capable of taking on projects of any size.
|
||||
homepage: https://gatk.broadinstitute.org/hc/en-us
|
||||
documentation: https://gatk.broadinstitute.org/hc/en-us/articles/360037052812-MarkDuplicates-Picard-
|
||||
tool_dev_url: https://github.com/broadinstitute/gatk
|
||||
doi: 10.1158/1538-7445.AM2017-3590
|
||||
licence: ["MIT"]
|
||||
|
||||
input:
|
||||
- meta:
|
||||
type: map
|
||||
description: |
|
||||
Groovy Map containing sample information
|
||||
e.g. [ id:'test', single_end:false ]
|
||||
- bam:
|
||||
type: file
|
||||
description: Sorted BAM file
|
||||
pattern: "*.{bam}"
|
||||
- fasta:
|
||||
type: file
|
||||
description: The reference fasta file
|
||||
pattern: "*.fasta"
|
||||
- fai:
|
||||
type: file
|
||||
description: Index of reference fasta file
|
||||
pattern: "*.fasta.fai"
|
||||
- dict:
|
||||
type: file
|
||||
description: GATK sequence dictionary
|
||||
pattern: "*.dict"
|
||||
|
||||
output:
|
||||
- meta:
|
||||
type: map
|
||||
description: |
|
||||
Groovy Map containing sample information
|
||||
e.g. [ id:'test', single_end:false ]
|
||||
- versions:
|
||||
type: file
|
||||
description: File containing software versions
|
||||
pattern: "versions.yml"
|
||||
- bam:
|
||||
type: file
|
||||
description: Marked duplicates BAM file
|
||||
pattern: "*.{bam}"
|
||||
|
||||
authors:
|
||||
- "@ajodeh-juma"
|
||||
- "@FriederikeHanssen"
|
||||
- "@maxulysse"
|
|
@ -22,6 +22,7 @@ process GATK4_MERGEBAMALIGNMENT {
|
|||
script:
|
||||
def args = task.ext.args ?: ''
|
||||
def prefix = task.ext.prefix ?: "${meta.id}"
|
||||
|
||||
def avail_mem = 3
|
||||
if (!task.memory) {
|
||||
log.info '[GATK MergeBamAlignment] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.'
|
||||
|
@ -30,10 +31,11 @@ process GATK4_MERGEBAMALIGNMENT {
|
|||
}
|
||||
"""
|
||||
gatk --java-options "-Xmx${avail_mem}g" MergeBamAlignment \\
|
||||
-ALIGNED $aligned \\
|
||||
-UNMAPPED $unmapped \\
|
||||
-R $fasta \\
|
||||
-O ${prefix}.bam \\
|
||||
--UNMAPPED_BAM $unmapped \\
|
||||
--ALIGNED_BAM $aligned \\
|
||||
--OUTPUT ${prefix}.bam \\
|
||||
--REFERENCE_SEQUENCE $fasta \\
|
||||
--TMP_DIR . \\
|
||||
$args
|
||||
|
||||
cat <<-END_VERSIONS > versions.yml
|
||||
|
|
|
@ -9,6 +9,7 @@ process GATK4_MERGEMUTECTSTATS {
|
|||
|
||||
input:
|
||||
tuple val(meta), path(stats)
|
||||
|
||||
output:
|
||||
tuple val(meta), path("*.vcf.gz.stats"), emit: stats
|
||||
path "versions.yml" , emit: versions
|
||||
|
@ -19,7 +20,7 @@ process GATK4_MERGEMUTECTSTATS {
|
|||
script:
|
||||
def args = task.ext.args ?: ''
|
||||
prefix = task.ext.prefix ?: "${meta.id}"
|
||||
def input = stats.collect{ " -stats ${it} "}.join()
|
||||
def input_list = stats.collect{ "--stats ${it}"}.join(' ')
|
||||
|
||||
def avail_mem = 3
|
||||
if (!task.memory) {
|
||||
|
@ -29,8 +30,9 @@ process GATK4_MERGEMUTECTSTATS {
|
|||
}
|
||||
"""
|
||||
gatk --java-options "-Xmx${avail_mem}g" MergeMutectStats \\
|
||||
${input} \\
|
||||
-output ${meta.id}.vcf.gz.stats \\
|
||||
$input_list \\
|
||||
--output ${prefix}.vcf.gz.stats \\
|
||||
--tmp-dir . \\
|
||||
$args
|
||||
|
||||
cat <<-END_VERSIONS > versions.yml
|
||||
|
|
|
@ -8,9 +8,8 @@ process GATK4_MERGEVCFS {
|
|||
'quay.io/biocontainers/gatk4:4.2.5.0--hdfd78af_0' }"
|
||||
|
||||
input:
|
||||
tuple val(meta), path(vcfs)
|
||||
path ref_dict
|
||||
val use_ref_dict
|
||||
tuple val(meta), path(vcf)
|
||||
path dict
|
||||
|
||||
output:
|
||||
tuple val(meta), path('*.vcf.gz'), emit: vcf
|
||||
|
@ -22,13 +21,9 @@ process GATK4_MERGEVCFS {
|
|||
script:
|
||||
def args = task.ext.args ?: ''
|
||||
def prefix = task.ext.prefix ?: "${meta.id}"
|
||||
def input_list = vcf.collect{ "--INPUT $it"}.join(' ')
|
||||
def reference_command = dict ? "--SEQUENCE_DICTIONARY $dict" : ""
|
||||
|
||||
// Make list of VCFs to merge
|
||||
def input = ""
|
||||
for (vcf in vcfs) {
|
||||
input += " I=${vcf}"
|
||||
}
|
||||
def ref = use_ref_dict ? "D=${ref_dict}" : ""
|
||||
def avail_mem = 3
|
||||
if (!task.memory) {
|
||||
log.info '[GATK MergeVcfs] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.'
|
||||
|
@ -37,9 +32,10 @@ process GATK4_MERGEVCFS {
|
|||
}
|
||||
"""
|
||||
gatk --java-options "-Xmx${avail_mem}g" MergeVcfs \\
|
||||
$input \\
|
||||
O=${prefix}.vcf.gz \\
|
||||
$ref \\
|
||||
$input_list \\
|
||||
--OUTPUT ${prefix}.vcf.gz \\
|
||||
$reference_command \\
|
||||
--TMP_DIR . \\
|
||||
$args
|
||||
|
||||
cat <<-END_VERSIONS > versions.yml
|
||||
|
|
|
@ -8,10 +8,7 @@ process GATK4_MUTECT2 {
|
|||
'quay.io/biocontainers/gatk4:4.2.5.0--hdfd78af_0' }"
|
||||
|
||||
input:
|
||||
tuple val(meta) , path(input) , path(input_index) , path(intervals), val(which_norm)
|
||||
val run_single
|
||||
val run_pon
|
||||
val run_mito
|
||||
tuple val(meta), path(input), path(input_index), path(intervals)
|
||||
path fasta
|
||||
path fai
|
||||
path dict
|
||||
|
@ -33,28 +30,10 @@ process GATK4_MUTECT2 {
|
|||
script:
|
||||
def args = task.ext.args ?: ''
|
||||
def prefix = task.ext.prefix ?: "${meta.id}"
|
||||
def panels_command = ''
|
||||
def normals_command = ''
|
||||
|
||||
def inputs_command = '-I ' + input.join( ' -I ')
|
||||
def interval = intervals ? "-L ${intervals}" : ""
|
||||
|
||||
if(run_pon) {
|
||||
panels_command = ''
|
||||
normals_command = ''
|
||||
|
||||
} else if(run_single) {
|
||||
panels_command = " --germline-resource $germline_resource --panel-of-normals $panel_of_normals"
|
||||
normals_command = ''
|
||||
|
||||
} else if(run_mito){
|
||||
panels_command = "-L ${intervals} --mitochondria-mode"
|
||||
normals_command = ''
|
||||
|
||||
} else {
|
||||
panels_command = " --germline-resource $germline_resource --panel-of-normals $panel_of_normals --f1r2-tar-gz ${prefix}.f1r2.tar.gz"
|
||||
normals_command = '-normal ' + which_norm.join( ' -normal ')
|
||||
}
|
||||
def inputs = input.collect{ "--input $it"}.join(" ")
|
||||
def interval_command = intervals ? "--intervals $intervals" : ""
|
||||
def pon_command = panel_of_normals ? "--panel-of-normals $panel_of_normals" : ""
|
||||
def gr_command = germline_resource ? "--germline-resource $germline_resource" : ""
|
||||
|
||||
def avail_mem = 3
|
||||
if (!task.memory) {
|
||||
|
@ -64,12 +43,13 @@ process GATK4_MUTECT2 {
|
|||
}
|
||||
"""
|
||||
gatk --java-options "-Xmx${avail_mem}g" Mutect2 \\
|
||||
-R ${fasta} \\
|
||||
${inputs_command} \\
|
||||
${normals_command} \\
|
||||
${panels_command} \\
|
||||
${interval} \\
|
||||
-O ${prefix}.vcf.gz \\
|
||||
$inputs \\
|
||||
--output ${prefix}.vcf.gz \\
|
||||
--reference $fasta \\
|
||||
$pon_command \\
|
||||
$gr_command \\
|
||||
$interval_command \\
|
||||
--tmp-dir . \\
|
||||
$args
|
||||
|
||||
cat <<-END_VERSIONS > versions.yml
|
||||
|
|
|
@ -34,22 +34,6 @@ input:
|
|||
type: File/string
|
||||
description: Specify region the tools is run on.
|
||||
pattern: ".{bed,interval_list}/chrM"
|
||||
- which_norm:
|
||||
type: list
|
||||
description: optional list of sample headers contained in the normal sample bam files (these are required for tumor_normal_pair mode)
|
||||
pattern: "testN"
|
||||
- run_single:
|
||||
type: boolean
|
||||
description: Specify whether or not to run in tumor_single mode instead of tumor_normal_pair mode (will be ignored if run_pon is also true)
|
||||
pattern: "true/false"
|
||||
- run_pon:
|
||||
type: boolean
|
||||
description: Specify whether or not to run in panel_of_normal mode instead of tumor_normal_pair mode
|
||||
pattern: "true/false"
|
||||
- run_mito:
|
||||
type: boolean
|
||||
description: Specify whether or not to run in mitochondria-mode instead of tumor_normal_pair mode
|
||||
pattern: "true/false"
|
||||
- fasta:
|
||||
type: file
|
||||
description: The reference fasta file
|
||||
|
|
|
@ -20,6 +20,7 @@ process GATK4_REVERTSAM {
|
|||
script:
|
||||
def args = task.ext.args ?: ''
|
||||
def prefix = task.ext.prefix ?: "${meta.id}"
|
||||
|
||||
def avail_mem = 3
|
||||
if (!task.memory) {
|
||||
log.info '[GATK RevertSam] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.'
|
||||
|
@ -28,8 +29,9 @@ process GATK4_REVERTSAM {
|
|||
}
|
||||
"""
|
||||
gatk --java-options "-Xmx${avail_mem}g" RevertSam \\
|
||||
I=$bam \\
|
||||
O=${prefix}.reverted.bam \\
|
||||
--INPUT $bam \\
|
||||
--OUTPUT ${prefix}.reverted.bam \\
|
||||
--TMP_DIR . \\
|
||||
$args
|
||||
|
||||
cat <<-END_VERSIONS > versions.yml
|
||||
|
|
|
@ -20,7 +20,8 @@ process GATK4_SAMTOFASTQ {
|
|||
script:
|
||||
def args = task.ext.args ?: ''
|
||||
def prefix = task.ext.prefix ?: "${meta.id}"
|
||||
def output = meta.single_end ? "FASTQ=${prefix}.fastq.gz" : "FASTQ=${prefix}_1.fastq.gz SECOND_END_FASTQ=${prefix}_2.fastq.gz"
|
||||
def output = meta.single_end ? "--FASTQ ${prefix}.fastq.gz" : "--FASTQ ${prefix}_1.fastq.gz --SECOND_END_FASTQ ${prefix}_2.fastq.gz"
|
||||
|
||||
def avail_mem = 3
|
||||
if (!task.memory) {
|
||||
log.info '[GATK SamToFastq] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.'
|
||||
|
@ -29,8 +30,9 @@ process GATK4_SAMTOFASTQ {
|
|||
}
|
||||
"""
|
||||
gatk --java-options "-Xmx${avail_mem}g" SamToFastq \\
|
||||
I=$bam \\
|
||||
--INPUT $bam \\
|
||||
$output \\
|
||||
--TMP_DIR . \\
|
||||
$args
|
||||
|
||||
cat <<-END_VERSIONS > versions.yml
|
||||
|
|
|
@ -21,6 +21,7 @@ process GATK4_SELECTVARIANTS {
|
|||
script:
|
||||
def args = task.ext.args ?: ''
|
||||
def prefix = task.ext.prefix ?: "${meta.id}"
|
||||
|
||||
def avail_mem = 3
|
||||
if (!task.memory) {
|
||||
log.info '[GATK VariantFiltration] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.'
|
||||
|
@ -29,8 +30,9 @@ process GATK4_SELECTVARIANTS {
|
|||
}
|
||||
"""
|
||||
gatk --java-options "-Xmx${avail_mem}G" SelectVariants \\
|
||||
-V $vcf \\
|
||||
-O ${prefix}.selectvariants.vcf.gz \\
|
||||
--variant $vcf \\
|
||||
--output ${prefix}.selectvariants.vcf.gz \\
|
||||
--tmp-dir . \\
|
||||
$args
|
||||
|
||||
cat <<-END_VERSIONS > versions.yml
|
||||
|
|
|
@ -8,7 +8,7 @@ process GATK4_SPLITNCIGARREADS {
|
|||
'quay.io/biocontainers/gatk4:4.2.5.0--hdfd78af_0' }"
|
||||
|
||||
input:
|
||||
tuple val(meta), path(bam)
|
||||
tuple val(meta), path(bam), path(bai), path(intervals)
|
||||
path fasta
|
||||
path fai
|
||||
path dict
|
||||
|
@ -23,6 +23,8 @@ process GATK4_SPLITNCIGARREADS {
|
|||
script:
|
||||
def args = task.ext.args ?: ''
|
||||
def prefix = task.ext.prefix ?: "${meta.id}"
|
||||
def interval_command = intervals ? "--intervals $intervals" : ""
|
||||
|
||||
def avail_mem = 3
|
||||
if (!task.memory) {
|
||||
log.info '[GATK SplitNCigarReads] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.'
|
||||
|
@ -31,9 +33,11 @@ process GATK4_SPLITNCIGARREADS {
|
|||
}
|
||||
"""
|
||||
gatk --java-options "-Xmx${avail_mem}g" SplitNCigarReads \\
|
||||
-R $fasta \\
|
||||
-I $bam \\
|
||||
-O ${prefix}.bam \\
|
||||
--input $bam \\
|
||||
--output ${prefix}.bam \\
|
||||
--reference $fasta \\
|
||||
$interval_command \\
|
||||
--tmp-dir . \\
|
||||
$args
|
||||
|
||||
cat <<-END_VERSIONS > versions.yml
|
||||
|
|
|
@ -23,6 +23,13 @@ input:
|
|||
type: list
|
||||
description: BAM/SAM/CRAM file containing reads
|
||||
pattern: "*.{bam,sam,cram}"
|
||||
- bai:
|
||||
type: list
|
||||
description: BAI/SAI/CRAI index file (optional)
|
||||
pattern: "*.{bai,sai,crai}"
|
||||
- intervals:
|
||||
type: file
|
||||
description: Bed file with the genomic regions included in the library (optional)
|
||||
- fasta:
|
||||
type: file
|
||||
description: The reference fasta file
|
||||
|
|
|
@ -8,7 +8,7 @@ process GATK4_VARIANTFILTRATION {
|
|||
'quay.io/biocontainers/gatk4:4.2.5.0--hdfd78af_0' }"
|
||||
|
||||
input:
|
||||
tuple val(meta), path(vcf), path(vcf_tbi)
|
||||
tuple val(meta), path(vcf), path(tbi)
|
||||
path fasta
|
||||
path fai
|
||||
path dict
|
||||
|
@ -24,6 +24,7 @@ process GATK4_VARIANTFILTRATION {
|
|||
script:
|
||||
def args = task.ext.args ?: ''
|
||||
def prefix = task.ext.prefix ?: "${meta.id}"
|
||||
|
||||
def avail_mem = 3
|
||||
if (!task.memory) {
|
||||
log.info '[GATK VariantFiltration] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.'
|
||||
|
@ -32,9 +33,10 @@ process GATK4_VARIANTFILTRATION {
|
|||
}
|
||||
"""
|
||||
gatk --java-options "-Xmx${avail_mem}G" VariantFiltration \\
|
||||
-R $fasta \\
|
||||
-V $vcf \\
|
||||
-O ${prefix}.vcf.gz \\
|
||||
--variant $vcf \\
|
||||
--output ${prefix}.vcf.gz \\
|
||||
--reference $fasta \\
|
||||
--tmp-dir . \\
|
||||
$args
|
||||
|
||||
cat <<-END_VERSIONS > versions.yml
|
||||
|
|
|
@ -8,11 +8,11 @@ process GATK4_VARIANTRECALIBRATOR {
|
|||
'quay.io/biocontainers/gatk4:4.2.5.0--hdfd78af_0' }"
|
||||
|
||||
input:
|
||||
tuple val(meta), path(vcf) , path(tbi)
|
||||
path fasta
|
||||
path fai
|
||||
path dict
|
||||
tuple path(resvcfs), path(restbis), val(reslabels)
|
||||
tuple val(meta), path(vcf), path(tbi)
|
||||
tuple path(vcfs), path(tbis), val(labels)
|
||||
path fasta
|
||||
path fai
|
||||
path dict
|
||||
|
||||
output:
|
||||
tuple val(meta), path("*.recal") , emit: recal
|
||||
|
@ -27,8 +27,8 @@ process GATK4_VARIANTRECALIBRATOR {
|
|||
script:
|
||||
def args = task.ext.args ?: ''
|
||||
def prefix = task.ext.prefix ?: "${meta.id}"
|
||||
refCommand = fasta ? "-R ${fasta} " : ''
|
||||
resourceCommand = '--resource:' + reslabels.join( ' --resource:')
|
||||
def reference_command = fasta ? "--reference $fasta " : ''
|
||||
def resource_command = labels.collect{"--resource:$it"}.join(' ')
|
||||
|
||||
def avail_mem = 3
|
||||
if (!task.memory) {
|
||||
|
@ -38,11 +38,12 @@ process GATK4_VARIANTRECALIBRATOR {
|
|||
}
|
||||
"""
|
||||
gatk --java-options "-Xmx${avail_mem}g" VariantRecalibrator \\
|
||||
${refCommand} \\
|
||||
-V ${vcf} \\
|
||||
-O ${prefix}.recal \\
|
||||
--variant $vcf \\
|
||||
--output ${prefix}.recal \\
|
||||
--tranches-file ${prefix}.tranches \\
|
||||
${resourceCommand} \\
|
||||
$reference_command \\
|
||||
$resource_command \\
|
||||
--tmp-dir . \\
|
||||
$args
|
||||
|
||||
cat <<-END_VERSIONS > versions.yml
|
||||
|
|
|
@ -4,8 +4,8 @@ process GUNZIP {
|
|||
|
||||
conda (params.enable_conda ? "conda-forge::sed=4.7" : null)
|
||||
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
|
||||
'https://containers.biocontainers.pro/s3/SingImgsRepo/biocontainers/v1.2.0_cv1/biocontainers_v1.2.0_cv1.img' :
|
||||
'biocontainers/biocontainers:v1.2.0_cv1' }"
|
||||
'https://depot.galaxyproject.org/singularity/ubuntu:20.04' :
|
||||
'ubuntu:20.04' }"
|
||||
|
||||
input:
|
||||
tuple val(meta), path(archive)
|
||||
|
|
|
@ -9,11 +9,11 @@ process KAIJU_KAIJU {
|
|||
|
||||
input:
|
||||
tuple val(meta), path(reads)
|
||||
tuple path(db), path(dbnodes)
|
||||
path(db)
|
||||
|
||||
output:
|
||||
tuple val(meta), path('*.tsv'), emit: results
|
||||
path "versions.yml" , emit: versions
|
||||
path "versions.yml" , emit: versions
|
||||
|
||||
when:
|
||||
task.ext.when == null || task.ext.when
|
||||
|
@ -23,11 +23,13 @@ process KAIJU_KAIJU {
|
|||
def prefix = task.ext.prefix ?: "${meta.id}"
|
||||
def input = meta.single_end ? "-i ${reads}" : "-i ${reads[0]} -j ${reads[1]}"
|
||||
"""
|
||||
dbnodes=`find -L ${db} -name "*nodes.dmp"`
|
||||
dbname=`find -L ${db} -name "*.fmi" -not -name "._*"`
|
||||
kaiju \\
|
||||
$args \\
|
||||
-z $task.cpus \\
|
||||
-t ${dbnodes} \\
|
||||
-f ${db} \\
|
||||
-t \$dbnodes \\
|
||||
-f \$dbname \\
|
||||
-o ${prefix}.tsv \\
|
||||
$input
|
||||
|
||||
|
|
|
@ -50,3 +50,4 @@ output:
|
|||
authors:
|
||||
- "@talnor"
|
||||
- "@sofstam"
|
||||
- "@jfy133"
|
||||
|
|
40
modules/kaiju/kaiju2table/main.nf
Normal file
40
modules/kaiju/kaiju2table/main.nf
Normal file
|
@ -0,0 +1,40 @@
|
|||
process KAIJU_KAIJU2TABLE {
|
||||
tag "$meta.id"
|
||||
label 'process_low'
|
||||
|
||||
conda (params.enable_conda ? "bioconda::kaiju=1.8.2" : null)
|
||||
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
|
||||
'https://depot.galaxyproject.org/singularity/kaiju:1.8.2--h5b5514e_1':
|
||||
'quay.io/biocontainers/kaiju:1.8.2--h2e03b76_0' }"
|
||||
|
||||
input:
|
||||
tuple val(meta), path(results)
|
||||
path db
|
||||
val taxon_rank
|
||||
|
||||
output:
|
||||
tuple val(meta), path('*.txt'), emit: summary
|
||||
path "versions.yml" , emit: versions
|
||||
|
||||
when:
|
||||
task.ext.when == null || task.ext.when
|
||||
|
||||
script:
|
||||
def args = task.ext.args ?: ''
|
||||
def prefix = task.ext.prefix ?: "${meta.id}"
|
||||
"""
|
||||
dbnodes=`find -L ${db} -name "*nodes.dmp"`
|
||||
dbname=`find -L ${db} -name "*.fmi" -not -name "._*"`
|
||||
kaiju2table $args \\
|
||||
-t \$dbnodes \\
|
||||
-n \$dbname \\
|
||||
-r ${taxon_rank} \\
|
||||
-o ${prefix}.txt \\
|
||||
${results}
|
||||
|
||||
cat <<-END_VERSIONS > versions.yml
|
||||
"${task.process}":
|
||||
kaiju: \$(echo \$( kaiju -h 2>&1 | sed -n 1p | sed 's/^.*Kaiju //' ))
|
||||
END_VERSIONS
|
||||
"""
|
||||
}
|
50
modules/kaiju/kaiju2table/meta.yml
Normal file
50
modules/kaiju/kaiju2table/meta.yml
Normal file
|
@ -0,0 +1,50 @@
|
|||
name: "kaiju_kaiju2table"
|
||||
description: write your description here
|
||||
keywords:
|
||||
- classify
|
||||
- metagenomics
|
||||
tools:
|
||||
- kaiju:
|
||||
description: Fast and sensitive taxonomic classification for metagenomics
|
||||
homepage: https://kaiju.binf.ku.dk/
|
||||
documentation: https://github.com/bioinformatics-centre/kaiju/blob/master/README.md
|
||||
tool_dev_url: https://github.com/bioinformatics-centre/kaiju
|
||||
doi: "10.1038/ncomms11257"
|
||||
licence: ["GNU GPL v3"]
|
||||
|
||||
input:
|
||||
- meta:
|
||||
type: map
|
||||
description: |
|
||||
Groovy Map containing sample information
|
||||
e.g. [ id:'test', single_end:false ]
|
||||
- results:
|
||||
type: file
|
||||
description: File containing the kaiju classification results
|
||||
pattern: "*.{txt}"
|
||||
- taxon_rank:
|
||||
type: string
|
||||
description: |
|
||||
Taxonomic rank to display in report
|
||||
pattern: "phylum|class|order|family|genus|species"
|
||||
|
||||
output:
|
||||
- meta:
|
||||
type: map
|
||||
description: |
|
||||
Groovy Map containing sample information
|
||||
e.g. [ id:'test', single_end:false ]
|
||||
- versions:
|
||||
type: file
|
||||
description: File containing software versions
|
||||
pattern: "versions.yml"
|
||||
- results:
|
||||
type: file
|
||||
description: |
|
||||
Summary table for a given taxonomic rank
|
||||
pattern: "*.{tsv}"
|
||||
|
||||
authors:
|
||||
- "@sofstam"
|
||||
- "@talnor"
|
||||
- "@jfy133"
|
|
@ -10,12 +10,15 @@ process KRAKEN2_KRAKEN2 {
|
|||
input:
|
||||
tuple val(meta), path(reads)
|
||||
path db
|
||||
val save_output_fastqs
|
||||
val save_reads_assignment
|
||||
|
||||
output:
|
||||
tuple val(meta), path('*classified*') , emit: classified
|
||||
tuple val(meta), path('*unclassified*'), emit: unclassified
|
||||
tuple val(meta), path('*report.txt') , emit: txt
|
||||
path "versions.yml" , emit: versions
|
||||
tuple val(meta), path('*classified*') , optional:true, emit: classified_reads_fastq
|
||||
tuple val(meta), path('*unclassified*') , optional:true, emit: unclassified_reads_fastq
|
||||
tuple val(meta), path('*classifiedreads*'), optional:true, emit: classified_reads_assignment
|
||||
tuple val(meta), path('*report.txt') , emit: report
|
||||
path "versions.yml" , emit: versions
|
||||
|
||||
when:
|
||||
task.ext.when == null || task.ext.when
|
||||
|
@ -26,19 +29,25 @@ process KRAKEN2_KRAKEN2 {
|
|||
def paired = meta.single_end ? "" : "--paired"
|
||||
def classified = meta.single_end ? "${prefix}.classified.fastq" : "${prefix}.classified#.fastq"
|
||||
def unclassified = meta.single_end ? "${prefix}.unclassified.fastq" : "${prefix}.unclassified#.fastq"
|
||||
def classified_command = save_output_fastqs ? "--classified-out ${classified}" : ""
|
||||
def unclassified_command = save_output_fastqs ? "--unclassified-out ${unclassified}" : ""
|
||||
def readclassification_command = save_reads_assignment ? "--output ${prefix}.kraken2.classifiedreads.txt" : ""
|
||||
def compress_reads_command = save_output_fastqs ? "pigz -p $task.cpus *.fastq" : ""
|
||||
|
||||
"""
|
||||
kraken2 \\
|
||||
--db $db \\
|
||||
--threads $task.cpus \\
|
||||
--unclassified-out $unclassified \\
|
||||
--classified-out $classified \\
|
||||
--report ${prefix}.kraken2.report.txt \\
|
||||
--gzip-compressed \\
|
||||
$unclassified_command \\
|
||||
$classified_command \\
|
||||
$readclassification_command \\
|
||||
$paired \\
|
||||
$args \\
|
||||
$reads
|
||||
|
||||
pigz -p $task.cpus *.fastq
|
||||
$compress_reads_command
|
||||
|
||||
cat <<-END_VERSIONS > versions.yml
|
||||
"${task.process}":
|
||||
|
|
|
@ -27,25 +27,40 @@ input:
|
|||
- db:
|
||||
type: directory
|
||||
description: Kraken2 database
|
||||
- save_output_fastqs:
|
||||
type: boolean
|
||||
description: |
|
||||
If true, optional commands are added to save classified and unclassified reads
|
||||
as fastq files
|
||||
- save_reads_assignment:
|
||||
type: boolean
|
||||
description: |
|
||||
If true, an optional command is added to save a file reporting the taxonomic
|
||||
classification of each input read
|
||||
output:
|
||||
- meta:
|
||||
type: map
|
||||
description: |
|
||||
Groovy Map containing sample information
|
||||
e.g. [ id:'test', single_end:false ]
|
||||
- classified:
|
||||
- classified_reads_fastq:
|
||||
type: file
|
||||
description: |
|
||||
Reads classified to belong to any of the taxa
|
||||
Reads classified as belonging to any of the taxa
|
||||
on the Kraken2 database.
|
||||
pattern: "*{fastq.gz}"
|
||||
- unclassified:
|
||||
- unclassified_reads_fastq:
|
||||
type: file
|
||||
description: |
|
||||
Reads not classified to belong to any of the taxa
|
||||
Reads not classified to any of the taxa
|
||||
on the Kraken2 database.
|
||||
pattern: "*{fastq.gz}"
|
||||
- txt:
|
||||
- classified_reads_assignment:
|
||||
type: file
|
||||
description: |
|
||||
Kraken2 output file indicating the taxonomic assignment of
|
||||
each input read
|
||||
- report:
|
||||
type: file
|
||||
description: |
|
||||
Kraken2 report containing stats about classified
|
||||
|
|
34
modules/krona/ktimporttext/main.nf
Normal file
34
modules/krona/ktimporttext/main.nf
Normal file
|
@ -0,0 +1,34 @@
|
|||
process KRONA_KTIMPORTTEXT {
|
||||
tag "$meta.id"
|
||||
label 'process_low'
|
||||
|
||||
conda (params.enable_conda ? "bioconda::krona=2.8.1" : null)
|
||||
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
|
||||
'https://depot.galaxyproject.org/singularity/krona:2.8.1--pl5321hdfd78af_1':
|
||||
'quay.io/biocontainers/krona:2.8.1--pl5321hdfd78af_1' }"
|
||||
|
||||
input:
|
||||
tuple val(meta), path(report)
|
||||
|
||||
output:
|
||||
tuple val(meta), path ('*.html'), emit: html
|
||||
path "versions.yml" , emit: versions
|
||||
|
||||
when:
|
||||
task.ext.when == null || task.ext.when
|
||||
|
||||
script:
|
||||
def args = task.ext.args ?: ''
|
||||
def prefix = task.ext.prefix ?: "${meta.id}"
|
||||
"""
|
||||
ktImportText \\
|
||||
$args \\
|
||||
-o ${prefix}.html \\
|
||||
$report
|
||||
|
||||
cat <<-END_VERSIONS > versions.yml
|
||||
"${task.process}":
|
||||
krona: \$( echo \$(ktImportText 2>&1) | sed 's/^.*KronaTools //g; s/- ktImportText.*\$//g')
|
||||
END_VERSIONS
|
||||
"""
|
||||
}
|
Some files were not shown because too many files have changed in this diff Show more
Loading…
Reference in a new issue