1
0
Fork 0
mirror of https://github.com/MillironX/taxprofiler.git synced 2024-11-21 22:06:04 +00:00

Merge branch 'dev' into nf-core-template-merge-2.5

This commit is contained in:
James A. Fellows Yates 2022-08-31 09:16:34 +02:00 committed by GitHub
commit 28d8d3f8fb
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
103 changed files with 14545 additions and 348 deletions

View file

@ -22,6 +22,21 @@ jobs:
NXF_VER:
- "21.10.3"
- "latest-everything"
parameters:
- "--perform_longread_qc false"
- "--perform_shortread_qc false"
- "--shortread_qc_tool fastp"
- "--shortread_qc_tool fastp --shortread_qc_mergepairs --shortread_qc_excludeunmerged"
- "--shortread_qc_tool fastp --shortread_qc_mergepairs"
- "--shortread_qc_tool adapterremoval"
- "--shortread_qc_tool adapterremoval --shortread_qc_mergepairs --shortread_qc_excludeunmerged"
- "--shortread_qc_tool adapterremoval --shortread_qc_mergepairs"
- "--shortread_complexityfilter_tool bbduk"
- "--shortread_complexityfilter_tool prinseqplusplus"
- "--perform_runmerging"
- "--perform_runmerging --shortread_qc_mergepairs"
- "--shortread_complexityfilter false --perform_shortread_hostremoval"
steps:
- name: Check out pipeline code
uses: actions/checkout@v2
@ -31,9 +46,68 @@ jobs:
with:
version: "${{ matrix.NXF_VER }}"
- name: Show current locale
run: locale
- name: Set UTF-8 enabled locale
run: |
sudo locale-gen en_US.UTF-8
sudo update-locale LANG=en_US.UTF-8
- name: Run pipeline with test data
# TODO nf-core: You can customise CI pipeline run tests as required
# For example: adding multiple test runs with different parameters
# Remember that you can parallelise this by using strategy.matrix
run: |
nextflow run ${GITHUB_WORKSPACE} -profile test,docker --outdir ./results
nextflow run ${GITHUB_WORKSPACE} -profile test,docker --outdir ./results ${{ matrix.parameters }}
motus:
name: Test mOTUs with workflow parameters
if: ${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/taxprofiler') }}
runs-on: ubuntu-latest
env:
NXF_VER: ${{ matrix.nxf_ver }}
NXF_ANSI_LOG: false
strategy:
matrix:
# Nextflow versions
include:
# Test pipeline minimum Nextflow version
- NXF_VER: "21.10.3"
NXF_EDGE: ""
# Test latest edge release of Nextflow
- NXF_VER: ""
NXF_EDGE: "1"
steps:
- name: Check out pipeline code
uses: actions/checkout@v2
- name: Install Nextflow
env:
NXF_VER: ${{ matrix.NXF_VER }}
# Uncomment only if the edge release is more recent than the latest stable release
# See https://github.com/nextflow-io/nextflow/issues/2467
# NXF_EDGE: ${{ matrix.NXF_EDGE }}
run: |
wget -qO- get.nextflow.io | bash
sudo mv nextflow /usr/local/bin/
- name: Show current locale
run: locale
- name: Set UTF-8 enabled locale
run: |
sudo locale-gen en_US.UTF-8
sudo update-locale LANG=en_US.UTF-8
- name: Prepare the database
run: |
wget https://raw.githubusercontent.com/motu-tool/mOTUs/master/motus/downloadDB.py
python downloadDB.py > download_db_log.txt
echo 'tool,db_name,db_params,db_path' > 'database_motus.csv'
echo 'motus,db_mOTU,,db_mOTU' >> 'database_motus.csv'
- name: Run pipeline with test data
run: |
nextflow run ${GITHUB_WORKSPACE} -profile test_motus,docker --outdir ./results --databases ./database_motus.csv

View file

@ -6,4 +6,5 @@ results/
.DS_Store
testing/
testing*
tests/
*.pyc

View file

@ -13,8 +13,55 @@
- [FastQC](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/)
- [MultiQC](https://pubmed.ncbi.nlm.nih.gov/27312411/)
> Ewels P, Magnusson M, Lundin S, Käller M. MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics. 2016 Oct 1;32(19):3047-8. doi: 10.1093/bioinformatics/btw354. Epub 2016 Jun 16. PubMed PMID: 27312411; PubMed Central PMCID: PMC5039924.
- [fastp](https://doi.org/10.1093/bioinformatics/bty560)
> Chen, Shifu, Yanqing Zhou, Yaru Chen, and Jia Gu. 2018. Fastp: An Ultra-Fast All-in-One FASTQ Preprocessor. Bioinformatics 34 (17): i884-90. 10.1093/bioinformatics/bty560.
- [AdapterRemoval2](https://doi.org/10.1186/s13104-016-1900-2)
> Schubert, Mikkel, Stinus Lindgreen, and Ludovic Orlando. 2016. AdapterRemoval v2: Rapid Adapter Trimming, Identification, and Read Merging. BMC Research Notes 9 (February): 88. doi:10.1186/s13104-016-1900-2.
- [Porechop](https://github.com/rrwick/Porechop)
- [BBTools](http://sourceforge.net/projects/bbmap/)
- [PRINSEQ++](https://doi.org/10.7287/peerj.preprints.27553v1)
> Cantu, Vito Adrian, Jeffrey Sadural, and Robert Edwards. 2019. PRINSEQ++, a Multi-Threaded Tool for Fast and Efficient Quality Control and Preprocessing of Sequencing Datasets. e27553v1. PeerJ Preprints. doi: 10.7287/peerj.preprints.27553v1.
- [Kraken2](https://doi.org/10.1186/s13059-019-1891-0)
> Wood, Derrick E., Jennifer Lu, and Ben Langmead. 2019. Improved Metagenomic Analysis with Kraken 2. Genome Biology 20 (1): 257. doi: 10.1186/s13059-019-1891-0.
- [Krona](https://doi.org/10.1186/1471-2105-12-385)
> Ondov, Brian D., Nicholas H. Bergman, and Adam M. Phillippy. 2011. Interactive metagenomic visualization in a Web browser. BMC Bioinformatics 12 (1): 385. doi: 10.1186/1471-2105-12-385.
- [MALT](https://doi.org/10.1038/s41559-017-0446-6)
> Vågene, Åshild J., Alexander Herbig, Michael G. Campana, Nelly M. Robles García, Christina Warinner, Susanna Sabin, Maria A. Spyrou, et al. 2018. Salmonella Enterica Genomes from Victims of a Major Sixteenth-Century Epidemic in Mexico. Nature Ecology & Evolution 2 (3): 520-28. doi: 10.1038/s41559-017-0446-6.
- [MEGAN](https://doi.org/10.1371/journal.pcbi.1004957)
> Huson, Daniel H., Sina Beier, Isabell Flade, Anna Górska, Mohamed El-Hadidi, Suparna Mitra, Hans-Joachim Ruscheweyh, and Rewati Tappu. 2016. “MEGAN Community Edition - Interactive Exploration and Analysis of Large-Scale Microbiome Sequencing Data.” PLoS Computational Biology 12 (6): e1004957. doi: 10.1371/journal.pcbi.1004957.
- [MetaPhlAn3](https://doi.org/10.7554/eLife.65088)
> Beghini, Francesco, Lauren J McIver, Aitor Blanco-Míguez, Leonard Dubois, Francesco Asnicar, Sagun Maharjan, Ana Mailyan, et al. 2021. “Integrating Taxonomic, Functional, and Strain-Level Profiling of Diverse Microbial Communities with BioBakery 3.” Edited by Peter Turnbaugh, Eduardo Franco, and C Titus Brown. ELife 10 (May): e65088. doi: 10.7554/eLife.65088
- [Centrifuge](https://doi.org/10.1101/gr.210641.116)
> Kim, Daehwan, Li Song, Florian P. Breitwieser, and Steven L. Salzberg. 2016. “Centrifuge: Rapid and Sensitive Classification of Metagenomic Sequences.” Genome Research 26 (12): 1721-29. doi: 10.1101/gr.210641.116.
- [DIAMOND](https://doi.org/10.1038/nmeth.3176)
> Buchfink, Benjamin, Chao Xie, and Daniel H. Huson. 2015. “Fast and Sensitive Protein Alignment Using DIAMOND.” Nature Methods 12 (1): 59-60. doi: 10.1038/nmeth.3176.
- [FILTLONG](https://github.com/rrwick/Filtlong)
## Software packaging/containerisation tools
- [Anaconda](https://anaconda.com)

View file

@ -1,4 +1,4 @@
# ![nf-core/taxprofiler](docs/images/nf-core-taxprofiler_logo_light.png#gh-light-mode-only) ![nf-core/taxprofiler](docs/images/nf-core-taxprofiler_logo_dark.png#gh-dark-mode-only)
# ![nf-core/taxprofiler](docs/images/nf-core-taxprofiler_logo_custom_light.png#gh-light-mode-only) ![nf-core/taxprofiler](docs/images/nf-core-taxprofiler_logo_custom_dark.png#gh-dark-mode-only)
[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/taxprofiler/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.XXXXXXX)
@ -14,7 +14,7 @@
<!-- TODO nf-core: Write a 1-2 sentence summary of what data the pipeline is for and what it does -->
**nf-core/taxprofiler** is a bioinformatics best-practice analysis pipeline for Taxonomic profiling of shotgun metagenomic data.
**nf-core/taxprofiler** is a bioinformatics best-practice analysis pipeline for taxonomic profiling of shotgun metagenomic data. It allows for in-parallel profiling with multiple profiling tools against multiple databases, produces standardised output tables.
The pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool to run tasks across multiple compute infrastructures in a very portable manner. It uses Docker/Singularity containers making installation trivial and results highly reproducible. The [Nextflow DSL2](https://www.nextflow.io/docs/latest/dsl2.html) implementation of this pipeline uses one container per process which makes it much easier to maintain and update software dependencies. Where possible, these processes have been submitted to and installed from [nf-core/modules](https://github.com/nf-core/modules) in order to make them available to all nf-core pipelines, and to everyone within the Nextflow community!
@ -27,7 +27,24 @@ On release, automated continuous integration tests run the pipeline on a full-si
<!-- TODO nf-core: Fill in short bullet-pointed list of the default steps in the pipeline -->
1. Read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/))
2. Present QC for raw reads ([`MultiQC`](http://multiqc.info/))
2. Performs optional read pre-processing
- Adapter clipping and merging (short read: [fastp](https://github.com/OpenGene/fastp), [AdapterRemoval2](https://github.com/MikkelSchubert/adapterremoval); long read: [porechop](https://github.com/rrwick/Porechop))
- Low complexity filtering ([bbduk](https://jgi.doe.gov/data-and-tools/software-tools/bbtools/), [PRINSEQ++](https://github.com/Adrian-Cantu/PRINSEQ-plus-plus))
- Host read removal ([BowTie2](http://bowtie-bio.sourceforge.net/bowtie2/))
- Run merging
3. Performs taxonomic profiling using one or more of:
- [Kraken2](https://ccb.jhu.edu/software/kraken2/)
- [MetaPhlAn3](https://huttenhower.sph.harvard.edu/metaphlan/)
- [MALT](https://uni-tuebingen.de/fakultaeten/mathematisch-naturwissenschaftliche-fakultaet/fachbereiche/informatik/lehrstuehle/algorithms-in-bioinformatics/software/malt/)
- [DIAMOND](https://github.com/bbuchfink/diamond)
- [Centrifuge](https://ccb.jhu.edu/software/centrifuge/)
- [Kaiju](https://kaiju.binf.ku.dk/)
- [mOTUs](https://motu-tool.org/)
- [MetaMaps](https://github.com/DiltheyLab/MetaMaps)
4. Perform optional post-processing with:
- [bracken](https://ccb.jhu.edu/software/bracken/)
5. Standardises output tables
6. Present QC for raw reads ([`MultiQC`](http://multiqc.info/))
## Quick Start
@ -50,10 +67,8 @@ On release, automated continuous integration tests run the pipeline on a full-si
4. Start running your own analysis!
<!-- TODO nf-core: Update the example "typical command" below used to run the pipeline -->
```bash
nextflow run nf-core/taxprofiler --input samplesheet.csv --outdir <OUTDIR> --genome GRCh37 -profile <docker/singularity/podman/shifter/charliecloud/conda/institute>
```console
nextflow run nf-core/taxprofiler --input samplesheet.csv --databases database.csv --outdir <OUTDIR> --run_<TOOL1> --run_<TOOL1> -profile <docker/singularity/podman/shifter/charliecloud/conda/institute>
```
## Documentation
@ -66,7 +81,7 @@ nf-core/taxprofiler was originally written by nf-core community.
We thank the following people for their extensive assistance in the development of this pipeline:
<!-- TODO nf-core: If applicable, make list of people who have also contributed -->
[James A. Fellows Yates](https://github.com/jfy133), [Moritz Beber](https://github.com/Midnighter), [Lauri Mesilaakso](https://github.com/ljmesi), [Sofia Stamouli](https://github.com/sofsam), [Maxime Borry](https://github.com/maxibor).
## Contributions and Support

View file

@ -9,3 +9,46 @@ report_section_order:
order: -1001
export_plots: true
custom_logo: "nf-core-taxprofiler_logo_custom_light.png"
custom_logo_url: https://nf-co.re/taxprofiler
custom_logo_title: "nf-core/taxprofiler"
run_modules:
- fastqc
- adapterRemoval
- fastp
- bowtie2
- kraken
- malt
- custom_content
#extra_fn_clean_exts:
# - '_fastp'
# - '.pe.settings'
# - '.se.settings'
top_modules:
- "fastqc":
name: "FastQC (pre-Trimming)"
path_filters:
- "*raw_*fastqc.zip"
- "fastp"
- "adapterRemoval"
- "fastqc":
name: "FastQC (post-Trimming)"
path_filters:
- "*raw_*processed.zip"
- "kraken":
name: "Kraken"
path_filters:
- "*.kraken2.report.txt"
- "kraken":
name: "Centrifuge"
anchor: "centrifuge"
target: "Centrifuge"
doi: "10.1101/gr.210641.116"
info: "is a very rapid and memory-efficient system for the classification of DNA sequences from microbial samples. The system uses a novel indexing scheme based on the Burrows-Wheeler transform (BWT) and the Ferragina-Manzini (FM) index. Note: Figure title"
extra: "Note: plot title will say Kraken2 due to Centrifuge producing the same output format as Kraken. If activated, see the actual Kraken2 results in the section above."
path_filters:
- "*.centrifuge.txt"

View file

@ -1,3 +1,6 @@
sample,fastq_1,fastq_2
SAMPLE_PAIRED_END,/path/to/fastq/files/AEG588A1_S1_L002_R1_001.fastq.gz,/path/to/fastq/files/AEG588A1_S1_L002_R2_001.fastq.gz
SAMPLE_SINGLE_END,/path/to/fastq/files/AEG588A4_S4_L003_R1_001.fastq.gz,
sample,run_accession,instrument_platform,fastq_1,fastq_2,fasta
2611,ERR5766174,ILLUMINA,,,/<path>/<to>/fasta/ERX5474930_ERR5766174_1.fa.gz
2612,ERR5766176,ILLUMINA,/<path>/<to>/fastq/ERX5474932_ERR5766176_1.fastq.gz,/<path>/<to>/fastq/ERX5474932_ERR5766176_2.fastq.gz,
2612,ERR5766180,ILLUMINA,/<path>/<to>/fastq/ERX5474936_ERR5766180_1.fastq.gz,,
2613,ERR5766181,ILLUMINA,/<path>/<to>/fastq/ERX5474937_ERR5766181_1.fastq.gz,/<path>/<to>/fastq/ERX5474937_ERR5766181_2.fastq.gz,
ERR3201952,ERR3201952,OXFORD_NANOPORE,/<path>/<to>/fastq/ERR3201952.fastq.gz,,

1 sample run_accession instrument_platform fastq_1 fastq_2 fasta
2 SAMPLE_PAIRED_END 2611 ERR5766174 ILLUMINA /path/to/fastq/files/AEG588A1_S1_L002_R1_001.fastq.gz /path/to/fastq/files/AEG588A1_S1_L002_R2_001.fastq.gz /<path>/<to>/fasta/ERX5474930_ERR5766174_1.fa.gz
3 SAMPLE_SINGLE_END 2612 ERR5766176 ILLUMINA /path/to/fastq/files/AEG588A4_S4_L003_R1_001.fastq.gz /<path>/<to>/fastq/ERX5474932_ERR5766176_1.fastq.gz /<path>/<to>/fastq/ERX5474932_ERR5766176_2.fastq.gz
4 2612 ERR5766180 ILLUMINA /<path>/<to>/fastq/ERX5474936_ERR5766180_1.fastq.gz
5 2613 ERR5766181 ILLUMINA /<path>/<to>/fastq/ERX5474937_ERR5766181_1.fastq.gz /<path>/<to>/fastq/ERX5474937_ERR5766181_2.fastq.gz
6 ERR3201952 ERR3201952 OXFORD_NANOPORE /<path>/<to>/fastq/ERR3201952.fastq.gz

View file

@ -1,258 +1,238 @@
#!/usr/bin/env python
"""Provide a command line tool to validate and transform tabular samplesheets."""
import argparse
import csv
import logging
from distutils import extension
import os
import sys
from collections import Counter
from pathlib import Path
import errno
import argparse
logger = logging.getLogger()
class RowChecker:
"""
Define a service that can validate and transform each given row.
Attributes:
modified (list): A list of dicts, where each dict corresponds to a previously
validated and transformed row. The order of rows is maintained.
"""
VALID_FORMATS = (
".fq.gz",
".fastq.gz",
def parse_args(args=None):
Description = (
"Reformat nf-core/taxprofiler samplesheet file and check its contents."
)
Epilog = "Example usage: python check_samplesheet.py <FILE_IN> <FILE_OUT>"
def __init__(
self,
sample_col="sample",
first_col="fastq_1",
second_col="fastq_2",
single_col="single_end",
**kwargs,
):
"""
Initialize the row checker with the expected column names.
parser = argparse.ArgumentParser(description=Description, epilog=Epilog)
parser.add_argument("FILE_IN", help="Input samplesheet file.")
parser.add_argument("FILE_OUT", help="Output file.")
return parser.parse_args(args)
Args:
sample_col (str): The name of the column that contains the sample name
(default "sample").
first_col (str): The name of the column that contains the first (or only)
FASTQ file path (default "fastq_1").
second_col (str): The name of the column that contains the second (if any)
FASTQ file path (default "fastq_2").
single_col (str): The name of the new column that will be inserted and
records whether the sample contains single- or paired-end sequencing
reads (default "single_end").
"""
super().__init__(**kwargs)
self._sample_col = sample_col
self._first_col = first_col
self._second_col = second_col
self._single_col = single_col
self._seen = set()
self.modified = []
def validate_and_transform(self, row):
"""
Perform all validations on the given row and insert the read pairing status.
Args:
row (dict): A mapping from column headers (keys) to elements of that row
(values).
"""
self._validate_sample(row)
self._validate_first(row)
self._validate_second(row)
self._validate_pair(row)
self._seen.add((row[self._sample_col], row[self._first_col]))
self.modified.append(row)
def _validate_sample(self, row):
"""Assert that the sample name exists and convert spaces to underscores."""
if len(row[self._sample_col]) <= 0:
raise AssertionError("Sample input is required.")
# Sanitize samples slightly.
row[self._sample_col] = row[self._sample_col].replace(" ", "_")
def _validate_first(self, row):
"""Assert that the first FASTQ entry is non-empty and has the right format."""
if len(row[self._first_col]) <= 0:
raise AssertionError("At least the first FASTQ file is required.")
self._validate_fastq_format(row[self._first_col])
def _validate_second(self, row):
"""Assert that the second FASTQ entry has the right format if it exists."""
if len(row[self._second_col]) > 0:
self._validate_fastq_format(row[self._second_col])
def _validate_pair(self, row):
"""Assert that read pairs have the same file extension. Report pair status."""
if row[self._first_col] and row[self._second_col]:
row[self._single_col] = False
if Path(row[self._first_col]).suffixes[-2:] != Path(row[self._second_col]).suffixes[-2:]:
raise AssertionError("FASTQ pairs must have the same file extensions.")
else:
row[self._single_col] = True
def _validate_fastq_format(self, filename):
"""Assert that a given filename has one of the expected FASTQ extensions."""
if not any(filename.endswith(extension) for extension in self.VALID_FORMATS):
raise AssertionError(
f"The FASTQ file has an unrecognized extension: {filename}\n"
f"It should be one of: {', '.join(self.VALID_FORMATS)}"
)
def validate_unique_samples(self):
"""
Assert that the combination of sample name and FASTQ filename is unique.
In addition to the validation, also rename all samples to have a suffix of _T{n}, where n is the
number of times the same sample exist, but with different FASTQ files, e.g., multiple runs per experiment.
"""
if len(self._seen) != len(self.modified):
raise AssertionError("The pair of sample name and FASTQ must be unique.")
seen = Counter()
for row in self.modified:
sample = row[self._sample_col]
seen[sample] += 1
row[self._sample_col] = f"{sample}_T{seen[sample]}"
def read_head(handle, num_lines=10):
"""Read the specified number of lines from the current position in the file."""
lines = []
for idx, line in enumerate(handle):
if idx == num_lines:
break
lines.append(line)
return "".join(lines)
def sniff_format(handle):
"""
Detect the tabular format.
Args:
handle (text file): A handle to a `text file`_ object. The read position is
expected to be at the beginning (index 0).
Returns:
csv.Dialect: The detected tabular format.
.. _text file:
https://docs.python.org/3/glossary.html#term-text-file
"""
peek = read_head(handle)
handle.seek(0)
sniffer = csv.Sniffer()
if not sniffer.has_header(peek):
logger.critical(f"The given sample sheet does not appear to contain a header.")
sys.exit(1)
dialect = sniffer.sniff(peek)
return dialect
def make_dir(path):
if len(path) > 0:
try:
os.makedirs(path)
except OSError as exception:
if exception.errno != errno.EEXIST:
raise exception
def print_error(error, context="Line", context_str=""):
error_str = "ERROR: Please check samplesheet -> {}".format(error)
if context != "" and context_str != "":
error_str = "ERROR: Please check samplesheet -> {}\n{}: '{}'".format(
error, context.strip(), context_str.strip()
)
print(error_str)
sys.exit(1)
def check_samplesheet(file_in, file_out):
"""
Check that the tabular samplesheet has the structure expected by nf-core pipelines.
Validate the general shape of the table, expected columns, and each row. Also add
an additional column which records whether one or two FASTQ reads were found.
Args:
file_in (pathlib.Path): The given tabular samplesheet. The format can be either
CSV, TSV, or any other format automatically recognized by ``csv.Sniffer``.
file_out (pathlib.Path): Where the validated and transformed samplesheet should
be created; always in CSV format.
Example:
This function checks that the samplesheet follows the following structure,
see also the `viral recon samplesheet`_::
sample,fastq_1,fastq_2
SAMPLE_PE,SAMPLE_PE_RUN1_1.fastq.gz,SAMPLE_PE_RUN1_2.fastq.gz
SAMPLE_PE,SAMPLE_PE_RUN2_1.fastq.gz,SAMPLE_PE_RUN2_2.fastq.gz
SAMPLE_SE,SAMPLE_SE_RUN1_1.fastq.gz,
.. _viral recon samplesheet:
https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/samplesheet/samplesheet_test_illumina_amplicon.csv
This function checks that the samplesheet follows the following structure:
sample,run_accession,instrument_platform,fastq_1,fastq_2,fasta
2611,ERR5766174,ILLUMINA,,,ERX5474930_ERR5766174_1.fa.gz
2612,ERR5766176,ILLUMINA,ERX5474932_ERR5766176_1.fastq.gz,ERX5474932_ERR5766176_2.fastq.gz,
2612,ERR5766174,ILLUMINA,ERX5474936_ERR5766180_1.fastq.gz,,
2613,ERR5766181,ILLUMINA,ERX5474937_ERR5766181_1.fastq.gz,ERX5474937_ERR5766181_2.fastq.gz,
"""
required_columns = {"sample", "fastq_1", "fastq_2"}
# See https://docs.python.org/3.9/library/csv.html#id3 to read up on `newline=""`.
with file_in.open(newline="") as in_handle:
reader = csv.DictReader(in_handle, dialect=sniff_format(in_handle))
# Validate the existence of the expected header columns.
if not required_columns.issubset(reader.fieldnames):
logger.critical(f"The sample sheet **must** contain the column headers: {', '.join(required_columns)}.")
FQ_EXTENSIONS = (".fq.gz", ".fastq.gz")
FA_EXTENSIONS = (
".fa",
".fa.gz",
".fasta",
".fasta.gz",
".fna",
".fna.gz",
".fas",
".fas.gz",
)
INSTRUMENT_PLATFORMS = [
"ABI_SOLID",
"BGISEQ",
"CAPILLARY",
"COMPLETE_GENOMICS",
"DNBSEQ",
"HELICOS",
"ILLUMINA",
"ION_TORRENT",
"LS454",
"OXFORD_NANOPORE",
"PACBIO_SMRT",
]
sample_mapping_dict = {}
with open(file_in, "r") as fin:
## Check header
MIN_COLS = 4
HEADER = [
"sample",
"run_accession",
"instrument_platform",
"fastq_1",
"fastq_2",
"fasta",
]
header = [x.strip('"') for x in fin.readline().strip().split(",")]
## Check for missing mandatory columns
missing_columns = list(set(HEADER) - set(header))
if len(missing_columns) > 0:
print(
"ERROR: Missing required column header -> {}. Note some columns can otherwise be empty. See pipeline documentation (https://nf-co.re/taxprofiler/usage).".format(
",".join(missing_columns)
)
)
sys.exit(1)
# Validate each row.
checker = RowChecker()
for i, row in enumerate(reader):
try:
checker.validate_and_transform(row)
except AssertionError as error:
logger.critical(f"{str(error)} On line {i + 2}.")
sys.exit(1)
checker.validate_unique_samples()
header = list(reader.fieldnames)
header.insert(1, "single_end")
# See https://docs.python.org/3.9/library/csv.html#id3 to read up on `newline=""`.
with file_out.open(mode="w", newline="") as out_handle:
writer = csv.DictWriter(out_handle, header, delimiter=",")
writer.writeheader()
for row in checker.modified:
writer.writerow(row)
## Find locations of mandatory columns
header_locs = {}
for i in HEADER:
header_locs[i] = header.index(i)
## Check sample entries
for line in fin:
## Pull out only relevant columns for downstream checking
line_parsed = [x.strip().strip('"') for x in line.strip().split(",")]
lspl = [line_parsed[i] for i in header_locs.values()]
# Check valid number of columns per row
if len(lspl) < len(HEADER):
print_error(
"Invalid number of columns (minimum = {})!".format(len(HEADER)),
"Line",
line,
)
num_cols = len([x for x in lspl if x])
if num_cols < MIN_COLS:
print_error(
"Invalid number of populated columns (minimum = {})!".format(
MIN_COLS
),
"Line",
line,
)
## Check sample name entries
(
sample,
run_accession,
instrument_platform,
fastq_1,
fastq_2,
fasta,
) = lspl[: len(HEADER)]
sample = sample.replace(" ", "_")
if not sample:
print_error("Sample entry has not been specified!", "Line", line)
## Check FastQ file extension
for fastq in [fastq_1, fastq_2]:
if fastq:
if fastq.find(" ") != -1:
print_error("FastQ file contains spaces!", "Line", line)
if not fastq.endswith(FQ_EXTENSIONS):
print_error(
f"FastQ file does not have extension {' or '.join(list(FQ_EXTENSIONS))} !",
"Line",
line,
)
if fasta:
if fasta.find(" ") != -1:
print_error("FastA file contains spaces!", "Line", line)
if not fasta.endswith(FA_EXTENSIONS):
print_error(
f"FastA file does not have extension {' or '.join(list(FA_EXTENSIONS))}!",
"Line",
line,
)
sample_info = []
# Check run_accession
if not run_accession:
print_error("Run accession has not been specified!", "Line", line)
else:
sample_info.append(run_accession)
# Check instrument_platform
if not instrument_platform:
print_error("Instrument platform has not been specified!", "Line", line)
else:
if instrument_platform not in INSTRUMENT_PLATFORMS:
print_error(
f"Instrument platform {instrument_platform} is not supported!",
f"List of supported platforms {', '.join(INSTRUMENT_PLATFORMS)}",
"Line",
line,
)
sample_info.append(instrument_platform)
## Auto-detect paired-end/single-end
if sample and fastq_1 and fastq_2: ## Paired-end short reads
sample_info.extend(["0", fastq_1, fastq_2, fasta])
elif (
sample and fastq_1 and not fastq_2
): ## Single-end short/long fastq reads
sample_info.extend(["1", fastq_1, fastq_2, fasta])
elif (
sample and fasta and not fastq_1 and not fastq_2
): ## Single-end long reads
sample_info.extend(["1", fastq_1, fastq_2, fasta])
elif fasta and (fastq_1 or fastq_2):
print_error(
"FastQ and FastA files cannot be specified together in the same library!",
"Line",
line,
)
else:
print_error("Invalid combination of columns provided!", "Line", line)
## Create sample mapping dictionary = { sample: [ run_accession, instrument_platform, single_end, fastq_1, fastq_2 , fasta ] }
if sample not in sample_mapping_dict:
sample_mapping_dict[sample] = [sample_info]
else:
if sample_info in sample_mapping_dict[sample]:
print_error("Samplesheet contains duplicate rows!", "Line", line)
else:
sample_mapping_dict[sample].append(sample_info)
## Write validated samplesheet with appropriate columns
HEADER_OUT = [
"sample",
"run_accession",
"instrument_platform",
"single_end",
"fastq_1",
"fastq_2",
"fasta",
]
if len(sample_mapping_dict) > 0:
out_dir = os.path.dirname(file_out)
make_dir(out_dir)
with open(file_out, "w") as fout:
fout.write(",".join(HEADER_OUT) + "\n")
for sample in sorted(sample_mapping_dict.keys()):
for idx, val in enumerate(sample_mapping_dict[sample]):
fout.write(f"{sample},{','.join(val)}\n")
else:
print_error("No entries to process!", "Samplesheet: {}".format(file_in))
def parse_args(argv=None):
"""Define and immediately parse command line arguments."""
parser = argparse.ArgumentParser(
description="Validate and transform a tabular samplesheet.",
epilog="Example: python check_samplesheet.py samplesheet.csv samplesheet.valid.csv",
)
parser.add_argument(
"file_in",
metavar="FILE_IN",
type=Path,
help="Tabular input samplesheet in CSV or TSV format.",
)
parser.add_argument(
"file_out",
metavar="FILE_OUT",
type=Path,
help="Transformed output samplesheet in CSV format.",
)
parser.add_argument(
"-l",
"--log-level",
help="The desired log level (default WARNING).",
choices=("CRITICAL", "ERROR", "WARNING", "INFO", "DEBUG"),
default="WARNING",
)
return parser.parse_args(argv)
def main(argv=None):
"""Coordinate argument parsing and program execution."""
args = parse_args(argv)
logging.basicConfig(level=args.log_level, format="[%(levelname)s] %(message)s")
if not args.file_in.is_file():
logger.error(f"The given input file {args.file_in} was not found!")
sys.exit(2)
args.file_out.parent.mkdir(parents=True, exist_ok=True)
check_samplesheet(args.file_in, args.file_out)
def main(args=None):
args = parse_args(args)
check_samplesheet(args.FILE_IN, args.FILE_OUT)
if __name__ == "__main__":

View file

@ -12,12 +12,6 @@
process {
publishDir = [
path: { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
withName: SAMPLESHEET_CHECK {
publishDir = [
path: { "${params.outdir}/pipeline_info" },
@ -26,8 +20,391 @@ process {
]
}
withName: DATABASE_CHECK {
publishDir = [
path: { "${params.outdir}/pipeline_info" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}
withName: FASTQC {
ext.args = '--quiet'
ext.prefix = { "${meta.id}_${meta.run_accession}_raw" }
publishDir = [
path: { "${params.outdir}/fastqc/raw" },
mode: params.publish_dir_mode,
pattern: '*.html'
]
}
withName: FASTQC_PROCESSED {
ext.args = '--quiet'
ext.prefix = { "${meta.id}_${meta.run_accession}_processed" }
publishDir = [
path: { "${params.outdir}/fastqc/processed" },
mode: params.publish_dir_mode,
pattern: '*.html'
]
}
withName: FASTP_SINGLE {
ext.args = [
// trimming options
params.shortread_qc_skipadaptertrim ? "--disable_adapter_trimming" : "",
params.shortread_qc_adapter1 ? "--adapter_sequence ${params.shortread_qc_adapter1}" : "",
// filtering options
"--length_required ${params.shortread_qc_minlength}",
(params.perform_shortread_complexityfilter && params.shortread_complexityfilter_tool == 'fastp') ? "--low_complexity_filter --complexity_threshold ${params.shortread_complexityfilter_fastp_threshold}" : ''
].join(' ').trim()
ext.prefix = { "${meta.id}_${meta.run_accession}" }
publishDir = [
path: { "${params.outdir}/fastp" },
mode: params.publish_dir_mode,
pattern: '*.fastq.gz',
enabled: params.save_preprocessed_reads
]
}
withName: FASTP_PAIRED {
ext.args = [
// collapsing options - option to retain singletons
params.shortread_qc_excludeunmerged ? '' : "--include_unmerged",
// trimming options
params.shortread_qc_skipadaptertrim ? "--disable_adapter_trimming" : "",
params.shortread_qc_adapter1 ? "--adapter_sequence ${params.shortread_qc_adapter1}" : "",
params.shortread_qc_adapter2 ? "--adapter_sequence_r2 ${params.shortread_qc_adapter2}" : "--detect_adapter_for_pe",
// filtering options
"--length_required ${params.shortread_qc_minlength}",
params.perform_shortread_complexityfilter && params.shortread_complexityfilter_tool == 'fastp' ? "--low_complexity_filter --complexity_threshold ${params.shortread_complexityfilter_fastp_threshold}" : ''
].join(' ').trim()
ext.prefix = { "${meta.id}_${meta.run_accession}" }
publishDir = [
path: { "${params.outdir}/fastp" },
mode: params.publish_dir_mode,
pattern: '*.fastq.gz',
enabled: params.save_preprocessed_reads
]
}
withName: ADAPTERREMOVAL_SINGLE {
ext.args = [
// trimming options
params.shortread_qc_skipadaptertrim ? "--adapter1 '' --adapter2 ''" : "",
params.shortread_qc_adapter1 ? "--adapter1 ${params.shortread_qc_adapter1}" : "",
// filtering options
"--minlength ${params.shortread_qc_minlength}"
].join(' ').trim()
ext.prefix = { "${meta.id}_${meta.run_accession}" }
publishDir = [
path: { "${params.outdir}/adapterremoval" },
mode: params.publish_dir_mode,
pattern: '*.fastq.gz',
enabled: params.save_preprocessed_reads
]
}
withName: ADAPTERREMOVAL_PAIRED {
ext.args = [
// collapsing options
params.shortread_qc_mergepairs ? "--collapse" : "",
// trimming options
params.shortread_qc_skipadaptertrim ? "--adapter1 '' --adapter2 ''" : "",
params.shortread_qc_adapter1 ? "--adapter1 ${params.shortread_qc_adapter1}" : "",
params.shortread_qc_adapter2 ? "--adapter2 ${params.shortread_qc_adapter2}" : "",
// filtering options
"--minlength ${params.shortread_qc_minlength}"
].join(' ').trim()
ext.prefix = { "${meta.id}_${meta.run_accession}" }
publishDir = [
path: { "${params.outdir}/adapterremoval" },
mode: params.publish_dir_mode,
pattern: '*.fastq.gz',
enabled: params.save_preprocessed_reads
]
}
withName: PORECHOP {
ext.prefix = { "${meta.id}_${meta.run_accession}" }
publishDir = [
path: { "${params.outdir}/porechop" },
mode: params.publish_dir_mode,
pattern: '*.fastq.gz',
enabled: params.save_preprocessed_reads
]
}
withName: FILTLONG {
ext.args = [
"--min_length ${params.longread_qc_minlength}",
"--keep_percent ${params.longread_qc_keep_percent}",
"--target_bases ${params.longread_qc_target_bases}"
]
.join(' ').trim()
ext.prefix = { "${meta.id}_${meta.run_accession}_filtered" }
publishDir = [
path: { "${params.outdir}/filtlong" },
mode: params.publish_dir_mode,
pattern: '*.{fastq.gz,log}',
enabled: params.save_preprocessed_reads
]
}
withName: BOWTIE2_BUILD {
publishDir = [
path: { "${params.outdir}/bowtie2/build" },
mode: params.publish_dir_mode,
enabled: params.save_hostremoval_index,
pattern: 'bowtie2'
]
}
withName: BOWTIE2_ALIGN {
ext.prefix = { "${meta.id}_${meta.run_accession}" }
publishDir = [
[
path: { "${params.outdir}/bowtie2/align" },
mode: params.publish_dir_mode,
pattern: '*.log'
],
[
path: { "${params.outdir}/bowtie2/align" },
mode: params.publish_dir_mode,
enabled: params.save_hostremoval_mapped,
pattern: '*.bam'
],
[
path: { "${params.outdir}/bowtie2/align" },
mode: params.publish_dir_mode,
enabled: params.save_hostremoval_unmapped,
pattern: '*.fastq.gz'
]
]
}
withName: MINIMAP2_INDEX {
ext.args = '-x map-ont'
publishDir = [
path: { "${params.outdir}/minimap2/index" },
mode: params.publish_dir_mode,
enabled: params.save_hostremoval_index,
pattern: 'minimap2'
]
}
withName: MINIMAP2_ALIGN {
ext.prefix = { "${meta.id}_${meta.run_accession}" }
publishDir = [
path: { "${params.outdir}/minimap2/align" },
mode: params.publish_dir_mode,
enabled: params.save_hostremoval_mapped,
pattern: '*.bam'
]
}
withName: SAMTOOLS_VIEW {
ext.args = '-f 4'
ext.prefix = { "${meta.id}.mapped.sorted" }
publishDir = [
path: { "${params.outdir}/samtools/view" },
mode: params.publish_dir_mode,
enabled: params.save_hostremoval_unmapped,
pattern: '*.bam'
]
}
withName: SAMTOOLS_BAM2FQ {
ext.prefix = { "${meta.id}_${meta.run_accession}" }
publishDir = [
path: { "${params.outdir}/samtools/bam2fq" },
mode: params.publish_dir_mode,
enabled: params.save_hostremoval_unmapped,
pattern: '*.fq.gz'
]
}
withName: BBMAP_BBDUK {
ext.args = [
"entropy=${params.shortread_complexityfilter_entropy}",
"entropywindow=${params.shortread_complexityfilter_bbduk_windowsize}",
params.shortread_complexityfilter_bbduk_mask ? "entropymask=t" : "entropymask=f"
].join(' ').trim()
ext.prefix = { "${meta.id}-${meta.run_accession}" }
publishDir = [
path: { "${params.outdir}/bbduk/" },
mode: params.publish_dir_mode,
pattern: '*.{fastq.gz,log}',
enabled: params.save_complexityfiltered_reads
]
}
withName: PRINSEQPLUSPLUS {
ext.args = [
params.shortread_complexityfilter_prinseqplusplus_mode == 'dust' ? "-lc_dust=${params.shortread_complexityfilter_prinseqplusplus_dustscore}" : "-lc_entropy=${params.shortread_complexityfilter_entropy}",
"-trim_qual_left=0 -trim_qual_left=0 -trim_qual_window=0 -trim_qual_step=0",
"-VERBOSE 2"
].join(' ').trim()
ext.prefix = { "${meta.id}-${meta.run_accession}" }
publishDir = [
path: { "${params.outdir}/prinseqplusplus/" },
mode: params.publish_dir_mode,
pattern: '*{_good_out.fastq.gz,_good_out_R1.fastq.gz,_good_out_R2.fastq.gz,log}',
enabled: params.save_complexityfiltered_reads
]
}
withName: CAT_FASTQ {
ext.prefix = { "${meta.id}" }
publishDir = [
path: { "${params.outdir}/run_merging/" },
mode: params.publish_dir_mode,
pattern: '*.fastq.gz',
enabled: params.save_runmerged_reads
]
}
withName: MALT_RUN {
ext.args = { "${meta.db_params}" }
// one run with multiple samples, so fix ID to just db name to ensure clean log name
ext.prefix = { "${meta.db_name}" }
publishDir = [
path: { "${params.outdir}/malt/${meta.db_name}" },
mode: params.publish_dir_mode,
pattern: '*.{rma6,log,sam}'
]
}
withName: 'NFCORE_TAXPROFILER:TAXPROFILER:PROFILING:MEGAN_RMA2INFO' {
ext.args = "-c2c Taxonomy"
ext.prefix = { "${meta.id}" }
publishDir = [
path: { "${params.outdir}/malt/${meta.db_name}" },
mode: params.publish_dir_mode,
pattern: '*.{txt.gz,megan}'
]
}
withName: KRAKEN2_KRAKEN2 {
ext.args = { "${meta.db_params}" }
ext.prefix = params.perform_runmerging ? { "${meta.id}-${meta.db_name}" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}" }
publishDir = [
path: { "${params.outdir}/kraken2/${meta.db_name}" },
mode: params.publish_dir_mode,
pattern: '*.{txt,report,fastq.gz}'
]
}
withName: KRONA_CLEANUP {
ext.prefix = params.perform_runmerging ? { "${meta.id}-${meta.db_name}" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}" }
publishDir = [
path: { "${params.outdir}/krona" },
mode: params.publish_dir_mode,
pattern: '*.{html}'
]
}
withName: KRONA_KTIMPORTTEXT {
ext.prefix = { "${meta.tool}-${meta.id}" }
publishDir = [
path: { "${params.outdir}/krona" },
mode: params.publish_dir_mode,
pattern: '*.{html}'
]
}
withName: 'NFCORE_TAXPROFILER:TAXPROFILER:VISUALIZATION_KRONA:MEGAN_RMA2INFO' {
ext.args = { "--read2class Taxonomy" }
ext.prefix = { "${meta.id}-${meta.db_name}" }
}
withName: KRONA_KTIMPORTTAXONOMY {
ext.args = "-i"
ext.prefix = { "${meta.tool}-${meta.id}" }
publishDir = [
path: { "${params.outdir}/krona" },
mode: params.publish_dir_mode,
pattern: '*.{html}'
]
}
withName: METAPHLAN3 {
ext.args = { "${meta.db_params}" }
ext.prefix = params.perform_runmerging ? { "${meta.id}-${meta.db_name}" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}" }
publishDir = [
path: { "${params.outdir}/metaphlan3/${meta.db_name}" },
mode: params.publish_dir_mode,
pattern: '*.{biom,txt}'
]
}
withName: CENTRIFUGE_CENTRIFUGE {
publishDir = [
path: { "${params.outdir}/centrifuge/${meta.db_name}" },
mode: params.publish_dir_mode,
pattern: '*.{txt,sam,gz}'
]
ext.args = { "${meta.db_params}" }
ext.prefix = params.perform_runmerging ? { "${meta.id}-${meta.db_name}.centrifuge" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}.centrifuge" }
}
withName: CENTRIFUGE_KREPORT {
errorStrategy = {task.exitStatus == 255 ? 'ignore' : 'retry'}
ext.args = { "${meta.db_params}" }
ext.prefix = params.perform_runmerging ? { "${meta.id}-${meta.db_name}.centrifuge" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}.centrifuge" }
publishDir = [
path: { "${params.outdir}/centrifuge/${meta.db_name}" },
mode: params.publish_dir_mode,
pattern: '*.{txt}'
]
}
withName: KAIJU_KAIJU {
publishDir = [
path: { "${params.outdir}/kaiju/${meta.db_name}" },
mode: params.publish_dir_mode,
pattern: '*.tsv'
]
ext.args = { "${meta.db_params}" }
ext.prefix = params.perform_runmerging ? { "${meta.id}-${meta.db_name}" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}" }
}
withName: KAIJU_KAIJU2TABLE {
ext.args = { "${meta.db_params}" }
ext.prefix = params.perform_runmerging ? { "${meta.id}-${meta.db_name}" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}" }
publishDir = [
path: { "${params.outdir}/kaiju/${meta.db_name}" },
mode: params.publish_dir_mode,
pattern: '*.{txt}'
]
}
withName: KAIJU_KAIJU2KRONA {
ext.args = '-v -u'
}
withName: DIAMOND_BLASTX {
ext.args = { "${meta.db_params}" }
ext.prefix = params.perform_runmerging ? { "${meta.id}-${meta.db_name}" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}" }
publishDir = [
path: { "${params.outdir}/diamond/${meta.db_name}" },
mode: params.publish_dir_mode,
pattern: '*.{blast,xml,txt,daa,sam,tsv,paf,log}'
]
}
withName: MOTUS_PROFILE {
ext.prefix = params.perform_runmerging ? { "${meta.id}-${meta.db_name}" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}" }
publishDir = [
path: { "${params.outdir}/motus/${meta.db_name}" },
mode: params.publish_dir_mode
]
}
withName: MOTUS_MERGE {
publishDir = [
path: { "${params.outdir}/motus/" },
mode: params.publish_dir_mode
]
}
withName: CUSTOM_DUMPSOFTWAREVERSIONS {
@ -38,4 +415,11 @@ process {
]
}
withName: MULTIQC {
publishDir = [
path: { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}
}

View file

@ -22,8 +22,35 @@ params {
// Input data
// TODO nf-core: Specify the paths to your test data on nf-core/test-datasets
// TODO nf-core: Give any required params for the test so that command line flags are not needed
input = 'https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/samplesheet/samplesheet_test_illumina_amplicon.csv'
// Genome references
genome = 'R64-1-1'
input = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv'
databases = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database.csv'
perform_shortread_qc = true
perform_longread_qc = true
perform_shortread_complexityfilter = true
perform_shortread_hostremoval = true
perform_longread_hostremoval = true
perform_runmerging = true
hostremoval_reference = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta'
run_kaiju = true
run_kraken2 = true
run_malt = true
run_metaphlan3 = true
run_centrifuge = true
run_diamond = true
run_motus = false
run_krona = true
krona_taxonomy_directory = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/sarscov2/metagenome/krona_taxonomy.tab'
malt_save_reads = true
kraken2_save_reads = true
centrifuge_save_reads = true
diamond_save_reads = true
}
process {
withName: MALT_RUN {
maxForks = 1
}
withName: MEGAN_RMA2INFO {
maxForks = 1
}
}

42
conf/test_motus.config Normal file
View file

@ -0,0 +1,42 @@
/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Nextflow config file for running minimal tests
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Defines input files and everything required to run a fast and simple pipeline test.
Use as follows:
nextflow run nf-core/taxprofiler -profile test,<docker/singularity> --outdir <OUTDIR>
----------------------------------------------------------------------------------------
*/
params {
config_profile_name = 'mOTUs Test profile'
config_profile_description = 'Minimal test to check mOTUs function'
// Limit resources so that this can run on GitHub Actions
max_cpus = 2
max_memory = '6.GB'
max_time = '6.h'
// Input data
// TODO nf-core: Specify the paths to your test data on nf-core/test-datasets
// TODO nf-core: Give any required params for the test so that command line flags are not needed
input = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv'
databases = 'database_motus.csv'
perform_shortread_clipmerge = false
perform_longread_clip = false
perform_shortread_complexityfilter = false
perform_shortread_hostremoval = false
perform_longread_hostremoval = false
perform_runmerging = false
hostremoval_reference = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta'
run_kaiju = false
run_kraken2 = false
run_malt = false
run_metaphlan3 = false
run_centrifuge = false
run_diamond = false
run_motus = true
run_profile_standardisation = true
}

View file

@ -0,0 +1,48 @@
/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Nextflow config file for running minimal tests
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Defines input files and everything required to run a fast and simple pipeline test.
Use as follows:
nextflow run nf-core/taxprofiler -profile test,<docker/singularity> --outdir <OUTDIR>
----------------------------------------------------------------------------------------
*/
params {
config_profile_name = 'Test profile'
config_profile_description = 'Minimal test dataset skipping all preprocessing to check pipeline function'
// Limit resources so that this can run on GitHub Actions
max_cpus = 2
max_memory = '6.GB'
max_time = '6.h'
// Input data
// TODO nf-core: Specify the paths to your test data on nf-core/test-datasets
// TODO nf-core: Give any required params for the test so that command line flags are not needed
input = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv'
databases = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database.csv'
perform_shortread_qc = false
perform_longread_qc = false
perform_shortread_complexityfilter = false
perform_shortread_hostremoval = false
perform_longread_hostremoval = false
perform_runmerging = false
hostremoval_reference = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta'
run_kaiju = true
run_kraken2 = true
run_malt = true
run_metaphlan3 = true
run_centrifuge = true
run_diamond = true
run_motus = false
run_krona = true
}
process {
withName: MALT_RUN {
maxForks = 1
}
}

View file

@ -0,0 +1,47 @@
/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Nextflow config file for running minimal tests
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Defines input files and everything required to run a fast and simple pipeline test.
Use as follows:
nextflow run nf-core/taxprofiler -profile test,<docker/singularity> --outdir <OUTDIR>
----------------------------------------------------------------------------------------
*/
params {
config_profile_name = 'Test profile'
config_profile_description = 'Minimal test dataset without performing any profiling to check pipeline function'
// Limit resources so that this can run on GitHub Actions
max_cpus = 2
max_memory = '6.GB'
max_time = '6.h'
// Input data
// TODO nf-core: Specify the paths to your test data on nf-core/test-datasets
// TODO nf-core: Give any required params for the test so that command line flags are not needed
input = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv'
databases = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database.csv'
perform_shortread_qc = true
perform_longread_qc = true
perform_shortread_complexityfilter = true
perform_shortread_hostremoval = true
perform_longread_hostremoval = true
perform_runmerging = true
hostremoval_reference = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta'
run_kaiju = false
run_kraken2 = false
run_malt = false
run_metaphlan3 = false
run_centrifuge = false
run_diamond = false
run_motus = false
}
process {
withName: MALT_RUN {
maxForks = 1
}
}

47
conf/test_nothing.config Normal file
View file

@ -0,0 +1,47 @@
/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Nextflow config file for running minimal tests
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Defines input files and everything required to run a fast and simple pipeline test.
Use as follows:
nextflow run nf-core/taxprofiler -profile test,<docker/singularity> --outdir <OUTDIR>
----------------------------------------------------------------------------------------
*/
params {
config_profile_name = 'Test profile'
config_profile_description = 'Minimal test dataset without performing any preprocessing nor profiling to check pipeline function. Useful when you only wish to test a single profiler without having to 'opt-out' of all the others'
// Limit resources so that this can run on GitHub Actions
max_cpus = 2
max_memory = '6.GB'
max_time = '6.h'
// Input data
// TODO nf-core: Specify the paths to your test data on nf-core/test-datasets
// TODO nf-core: Give any required params for the test so that command line flags are not needed
input = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv'
databases = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database.csv'
perform_shortread_qc = false
perform_longread_qc = false
perform_shortread_complexityfilter = false
perform_shortread_hostremoval = false
perform_longread_hostremoval = false
perform_runmerging = false
hostremoval_reference = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta'
run_kaiju = false
run_kraken2 = false
run_malt = false
run_metaphlan3 = false
run_centrifuge = false
run_diamond = false
run_motus = false
}
process {
withName: MALT_RUN {
maxForks = 1
}
}

Binary file not shown.

After

Width:  |  Height:  |  Size: 131 KiB

View file

@ -0,0 +1,444 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!-- Created with Inkscape (http://www.inkscape.org/) -->
<svg
width="160mm"
height="120mm"
viewBox="0 0 160 120"
version="1.1"
id="svg5581"
inkscape:version="1.2 (1:1.2+202205241504+da316b6974)"
sodipodi:docname="nf-core-taxprofiler_icon.svg"
xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
xmlns:xlink="http://www.w3.org/1999/xlink"
xmlns="http://www.w3.org/2000/svg"
xmlns:svg="http://www.w3.org/2000/svg">
<sodipodi:namedview
id="namedview5583"
pagecolor="#ffffff"
bordercolor="#666666"
borderopacity="1.0"
inkscape:showpageshadow="2"
inkscape:pageopacity="0.0"
inkscape:pagecheckerboard="0"
inkscape:deskcolor="#d1d1d1"
inkscape:document-units="mm"
showgrid="true"
inkscape:zoom="0.41291035"
inkscape:cx="541.27972"
inkscape:cy="445.61731"
inkscape:window-width="1920"
inkscape:window-height="1043"
inkscape:window-x="0"
inkscape:window-y="0"
inkscape:window-maximized="1"
inkscape:current-layer="layer1">
<inkscape:grid
type="xygrid"
id="grid6096"
originx="-7.7520637"
originy="-7.8991986" />
</sodipodi:namedview>
<defs
id="defs5578">
<linearGradient
inkscape:collect="always"
xlink:href="#linearGradient21662"
id="linearGradient30057"
x1="10.213824"
y1="221.42242"
x2="218.95003"
y2="221.42242"
gradientUnits="userSpaceOnUse"
gradientTransform="matrix(0.6691607,0.01747377,-0.01751914,0.67089835,5.6293239,-100.28017)" />
<linearGradient
x1="0"
y1="0"
x2="1"
y2="0"
gradientUnits="userSpaceOnUse"
gradientTransform="matrix(37.87862,-29.594021,-29.594021,-37.87862,275.46292,136.24821)"
spreadMethod="pad"
id="linearGradient21662">
<stop
style="stop-opacity:1;stop-color:#000000"
offset="0"
id="stop21652" />
<stop
style="stop-opacity:1;stop-color:#0c542a"
offset="0.214724"
id="stop21654" />
<stop
style="stop-opacity:1;stop-color:#25af64"
offset="0.84662598"
id="stop21656" />
<stop
style="stop-opacity:1;stop-color:#25af64"
offset="0.846626"
id="stop21658" />
<stop
style="stop-opacity:1;stop-color:#25af64"
offset="1"
id="stop21660" />
</linearGradient>
<linearGradient
inkscape:collect="always"
xlink:href="#linearGradient21662"
id="linearGradient2708"
x1="14.381735"
y1="80.392174"
x2="49.04929"
y2="80.392174"
gradientUnits="userSpaceOnUse" />
<linearGradient
inkscape:collect="always"
xlink:href="#linearGradient21662"
id="linearGradient6027"
gradientUnits="userSpaceOnUse"
x1="14.381735"
y1="80.392174"
x2="49.04929"
y2="80.392174" />
<linearGradient
inkscape:collect="always"
xlink:href="#linearGradient21662"
id="linearGradient6029"
gradientUnits="userSpaceOnUse"
x1="14.381735"
y1="80.392174"
x2="49.04929"
y2="80.392174" />
<linearGradient
inkscape:collect="always"
xlink:href="#linearGradient21662"
id="linearGradient6031"
gradientUnits="userSpaceOnUse"
x1="14.381735"
y1="80.392174"
x2="49.04929"
y2="80.392174" />
<linearGradient
inkscape:collect="always"
xlink:href="#linearGradient21662"
id="linearGradient6033"
gradientUnits="userSpaceOnUse"
x1="14.381735"
y1="80.392174"
x2="49.04929"
y2="80.392174" />
<linearGradient
inkscape:collect="always"
xlink:href="#linearGradient21662"
id="linearGradient6035"
gradientUnits="userSpaceOnUse"
x1="14.381735"
y1="80.392174"
x2="49.04929"
y2="80.392174" />
<linearGradient
inkscape:collect="always"
xlink:href="#linearGradient21662"
id="linearGradient6037"
gradientUnits="userSpaceOnUse"
x1="14.381735"
y1="80.392174"
x2="49.04929"
y2="80.392174" />
<linearGradient
inkscape:collect="always"
xlink:href="#linearGradient21662"
id="linearGradient6039"
gradientUnits="userSpaceOnUse"
x1="14.381735"
y1="80.392174"
x2="49.04929"
y2="80.392174" />
<linearGradient
inkscape:collect="always"
xlink:href="#linearGradient21662"
id="linearGradient6041"
gradientUnits="userSpaceOnUse"
x1="14.381735"
y1="80.392174"
x2="49.04929"
y2="80.392174" />
<linearGradient
inkscape:collect="always"
xlink:href="#linearGradient21662"
id="linearGradient6043"
gradientUnits="userSpaceOnUse"
x1="14.381735"
y1="80.392174"
x2="49.04929"
y2="80.392174" />
<linearGradient
inkscape:collect="always"
xlink:href="#linearGradient21662"
id="linearGradient6045"
gradientUnits="userSpaceOnUse"
x1="14.381735"
y1="80.392174"
x2="49.04929"
y2="80.392174" />
<linearGradient
inkscape:collect="always"
xlink:href="#linearGradient21662"
id="linearGradient6047"
gradientUnits="userSpaceOnUse"
x1="14.381735"
y1="80.392174"
x2="49.04929"
y2="80.392174" />
<linearGradient
inkscape:collect="always"
xlink:href="#linearGradient21662"
id="linearGradient6049"
gradientUnits="userSpaceOnUse"
x1="14.381735"
y1="80.392174"
x2="49.04929"
y2="80.392174" />
<linearGradient
inkscape:collect="always"
xlink:href="#linearGradient21662"
id="linearGradient6051"
gradientUnits="userSpaceOnUse"
x1="14.381735"
y1="80.392174"
x2="49.04929"
y2="80.392174" />
<linearGradient
inkscape:collect="always"
xlink:href="#linearGradient21662"
id="linearGradient6053"
gradientUnits="userSpaceOnUse"
x1="14.381735"
y1="80.392174"
x2="49.04929"
y2="80.392174" />
<linearGradient
inkscape:collect="always"
xlink:href="#linearGradient21662"
id="linearGradient6055"
gradientUnits="userSpaceOnUse"
x1="14.381735"
y1="80.392174"
x2="49.04929"
y2="80.392174" />
<linearGradient
inkscape:collect="always"
xlink:href="#linearGradient21662"
id="linearGradient6057"
gradientUnits="userSpaceOnUse"
x1="14.381735"
y1="80.392174"
x2="49.04929"
y2="80.392174" />
<linearGradient
inkscape:collect="always"
xlink:href="#linearGradient21662"
id="linearGradient6059"
gradientUnits="userSpaceOnUse"
x1="14.381735"
y1="80.392174"
x2="49.04929"
y2="80.392174" />
<linearGradient
inkscape:collect="always"
xlink:href="#linearGradient21662"
id="linearGradient6061"
gradientUnits="userSpaceOnUse"
x1="14.381735"
y1="80.392174"
x2="49.04929"
y2="80.392174" />
<linearGradient
inkscape:collect="always"
xlink:href="#linearGradient21662"
id="linearGradient6063"
gradientUnits="userSpaceOnUse"
x1="14.381735"
y1="80.392174"
x2="49.04929"
y2="80.392174" />
<linearGradient
inkscape:collect="always"
xlink:href="#linearGradient21662"
id="linearGradient6065"
gradientUnits="userSpaceOnUse"
x1="14.381735"
y1="80.392174"
x2="49.04929"
y2="80.392174" />
<linearGradient
inkscape:collect="always"
xlink:href="#linearGradient21662"
id="linearGradient6067"
gradientUnits="userSpaceOnUse"
x1="14.381735"
y1="80.392174"
x2="49.04929"
y2="80.392174" />
<linearGradient
inkscape:collect="always"
xlink:href="#linearGradient21662"
id="linearGradient6069"
gradientUnits="userSpaceOnUse"
x1="14.381735"
y1="80.392174"
x2="49.04929"
y2="80.392174" />
</defs>
<g
inkscape:label="Layer 1"
inkscape:groupmode="layer"
id="layer1"
transform="translate(-7.7520638,-7.8991986)">
<g
id="g6123"
transform="translate(7.3175494,9.6409068)">
<path
id="path30012-5"
style="fill:#ffffff;fill-opacity:1;stroke:none;stroke-width:0.424868;stroke-linecap:round"
d="M 9.5900834,8.8480637 C 7.5556439,12.028833 7.7408039,16.123943 7.7687039,19.769833 c 0.83412,6.65305 4.1623501,13.08043 9.6588001,17.05639 2.95411,2.45491 6.166,4.56934 9.30414,6.77667 -1.90794,4.17676 -2.98236,9.29825 -0.29786,13.40918 1.67713,3.72229 4.84583,6.89989 5.31985,11.06218 -0.73221,2.86643 -3.54676,6.3663 -0.97712,9.12477 2.12262,2.77099 4.06395,5.64907 5.35322,8.82231 2.02674,3.35439 6.5976,4.09759 10.20366,3.73748 2.47738,-0.18007 4.72202,-2.67715 3.28025,-5.08004 -1.18247,-2.55998 -4.28744,-2.18411 -6.38635,-3.25125 -1.36801,-2.44734 -3.00653,-5.99123 -0.26124,-8.17889 2.5926,-2.70872 4.69882,-5.81991 6.94515,-8.77398 2.71899,-1.24434 5.98816,-0.0654 8.84595,0.2859 10.88656,2.48435 21.31368,6.88721 32.36055,8.64773 3.13924,0.41138 6.76175,-0.45373 9.579516,0.92166 1.40942,4.43747 2.34224,9.45414 5.76225,12.84528 2.6934,1.93573 7.02228,2.12481 9.53599,-0.13787 1.60636,-2.93172 -1.52253,-5.89536 -4.20194,-6.62585 -0.40071,-2.8223 -0.37969,-5.87179 0.26854,-8.58529 3.77785,-0.24182 7.24094,1.16954 10.61732,2.45295 1.78619,3.3965 5.12259,5.96321 8.91186,6.64761 1.60415,-1.40492 2.56073,1.72858 3.8909,2.50862 2.79709,2.93023 6.8997,5.0038 10.99366,4.75587 3.26301,-0.67749 3.79546,-4.59452 2.47471,-7.17014 -0.80988,-4.2018 -3.22071,-8.59389 -1.2047,-12.82352 1.17357,-5.20598 -1.35199,-10.96325 -5.77146,-13.90702 -4.33733,-3.64436 -10.39077,-3.64653 -14.92742,-6.93614 -10.03223,-6.15469 -21.64077,-9.58338 -33.280326,-11.28456 -12.56653,-0.98847 -25.20371,-0.69003 -37.78225,-1.58411 -7.09295,-0.25502 -14.55087,-1.15633 -21.23347,1.84537 -3.9067,-0.36049 -7.22928,-2.98747 -10.60062,-4.8325 -3.63865,-2.26998 -7.6488,-4.94361 -8.46777,-9.49866 -1.63814,-4.07488 -1.20485,-8.6292 -2.57509,-12.7242393 -0.092,-1.43702 -3.1468606,-2.01015 -3.5173206,-0.42766 z" />
<path
style="fill:url(#linearGradient30057);fill-opacity:1;stroke:none;stroke-width:0.424868;stroke-linecap:round"
d="M 10.931314,9.9653233 C 9.7827934,11.308473 9.8484234,13.338583 9.5801634,15.027613 c -0.34115,2.76171 -0.29206,5.58762 0.6731896,8.22888 1.139301,3.87108 2.982591,7.65359 6.139101,10.27078 3.79198,3.54184 8.2401,6.33641 12.43029,9.30118 -0.91535,2.99306 -2.65543,5.87431 -2.30804,9.13471 0.18061,2.677 1.94571,4.91009 3.11856,7.23201 1.50937,2.8256 3.84729,5.54985 3.82514,8.91032 -0.0787,2.49314 -2.48083,4.5082 -1.96562,7.03872 1.67419,2.68138 3.992,4.96942 5.15932,7.96769 0.65556,1.60629 1.4695,3.40539 3.31812,3.87174 2.38126,0.82152 5.41091,1.64045 7.63103,0.0125 0.82635,-1.48261 -1.06902,-2.72955 -2.36736,-2.87342 -1.71199,-0.17589 -3.69466,-0.59568 -4.39228,-2.41244 -0.96281,-2.26721 -2.29358,-4.88743 -1.3576,-7.36922 2.00069,-3.29907 5.15025,-5.72284 7.05316,-9.10416 0.97281,-1.66962 2.43831,-3.35242 4.55042,-3.26353 6.66383,-0.13609 13.02922,2.25844 19.35504,4.0482 6.38637,1.7963 12.67764,4.02096 19.20445,5.25889 3.77887,0.58276 7.72718,-0.18602 11.419706,0.93529 1.63461,0.75428 1.35512,2.9334 2.05535,4.34159 1.1597,3.06423 1.81242,6.5515 4.16823,8.9707 1.76834,1.42588 4.39281,1.36118 6.4399,0.63293 1.20689,-0.70879 -0.0705,-2.295 -0.82735,-2.84018 -0.85353,-0.97431 -2.49109,-0.81793 -3.19991,-1.88904 -0.61703,-3.21399 -0.48223,-6.57875 0.0181,-9.80404 0.2319,-1.63989 2.04068,-2.10159 3.44204,-1.83986 3.30625,0.10127 6.50718,1.46919 9.55674,2.3424 -0.71254,-1.81326 -1.19119,-3.75193 -0.97348,-5.72815 0.0204,-1.69846 0.15688,-3.58482 1.5917,-4.73058 1.83011,-1.86627 3.60232,-3.85815 5.70315,-5.41088 0.57552,0.16274 -0.36083,0.85452 -0.4987,1.09386 -1.90807,2.09201 -4.24787,3.84191 -5.83093,6.20217 -0.46736,1.92903 -0.41493,3.99677 -0.24575,5.96645 0.84833,2.57726 2.1706,5.12487 4.1346,7.01585 1.36224,0.87656 2.75795,2.00764 4.37729,2.2713 1.6213,-0.74843 1.95011,-2.85006 2.55124,-4.35962 1.10913,-3.99969 1.90782,-8.08534 2.71945,-12.15319 0.1702,-1.75995 0.75519,-4.04477 -0.9398,-5.26766 -0.66443,-0.31636 -0.15084,-1.05324 0.36147,-0.52827 2.02638,1.51086 1.30614,4.35901 1.06011,6.49585 -0.85333,4.26646 -1.66477,8.56551 -2.96213,12.72302 -0.11797,0.91577 -1.34029,1.9982 -0.53389,2.72401 2.58751,2.57362 4.81242,5.92113 8.5085,6.96547 1.59827,0.50471 3.80045,1.66945 5.18808,0.14586 1.17766,-1.4017 -0.0976,-3.1877 -0.28432,-4.72092 -0.75396,-3.29433 -2.15674,-6.56045 -1.96438,-9.99106 0.37367,-2.10533 1.65495,-4.11536 1.02167,-6.32262 -0.34992,-2.84568 -1.45335,-5.69952 -3.77909,-7.50012 -2.77154,-2.67603 -6.40479,-4.14933 -10.09936,-5.01092 -3.81561,-1.24593 -6.9919,-3.83431 -10.61749,-5.50364 -6.72466,-3.69962 -14.2084,-5.65169 -21.615834,-7.44474 -8.303242,-1.93229 -16.888842,-1.47941 -25.343602,-1.84797 -8.46313,-0.1352 -16.90366,-0.80082 -25.3617,-1.05107 -4.38737,-0.20312 -8.92121,-0.0523 -12.99753,1.77349 -2.83378,1.235 -5.55113,-0.83445 -8.06653,-1.90121 -3.95435,-2.11512 -8.20653,-4.63235 -11.45036,-7.81607 -1.44093,-1.52832 -1.98381,-3.13498 -2.61394,-4.93556 -0.98461,-2.81345 -1.62386,-5.97312 -1.9462,-9.06999 -0.24149,-1.39646 -0.0764,-3.01443 -0.81326,-4.2426397 -0.0219,-0.007 -0.045,-0.0107 -0.0678,-0.007 z"
id="path30012"
sodipodi:nodetypes="cccccccccccccccccccccccccccccccccccccccccccccccccccccccccscccc"
inkscape:export-filename="/Users/whx424/Pictures/Illustrations/taxprofiler_v11.png"
inkscape:export-xdpi="159"
inkscape:export-ydpi="159" />
<path
id="path1218-5-4"
style="fill:#ffffff;fill-opacity:1;stroke:#ffffff;stroke-width:4.68179;stroke-linecap:round"
d="m 36.798094,98.985353 a 7.0058785,6.6371479 0 0 1 -7.00588,6.637147 7.0058785,6.6371479 0 0 1 -7.00589,-6.637147 7.0058785,6.6371479 0 0 1 7.00589,-6.63713 7.0058785,6.6371479 0 0 1 7.00588,6.63713 z" />
<path
id="path1218-8-0"
style="fill:#ffffff;fill-opacity:1;stroke:#ffffff;stroke-width:4.68179;stroke-linecap:round"
d="m 48.458974,96.857863 a 7.0058789,6.6371479 0 0 1 -7.00588,6.637147 7.0058789,6.6371479 0 0 1 -7.00588,-6.637147 7.0058789,6.6371479 0 0 1 7.00588,-6.63715 7.0058789,6.6371479 0 0 1 7.00588,6.63715 z" />
<path
id="path1218-4-46"
style="fill:#ffffff;fill-opacity:1;stroke:#ffffff;stroke-width:4.68179;stroke-linecap:round"
d="m 59.481564,99.639333 a 7.0058789,6.6371479 0 0 1 -7.00588,6.637157 7.0058789,6.6371479 0 0 1 -7.00588,-6.637157 7.0058789,6.6371479 0 0 1 7.00588,-6.63715 7.0058789,6.6371479 0 0 1 7.00588,6.63715 z" />
<path
id="path1218-7-4"
style="fill:#ffffff;fill-opacity:1;stroke:#ffffff;stroke-width:4.68179;stroke-linecap:round"
d="m 71.168234,96.889183 a 7.0058785,6.6371479 0 0 1 -7.00589,6.637157 7.0058785,6.6371479 0 0 1 -7.00588,-6.637157 7.0058785,6.6371479 0 0 1 7.00588,-6.63715 7.0058785,6.6371479 0 0 1 7.00589,6.63715 z" />
<path
id="path1218-1-6"
style="fill:#ffffff;fill-opacity:1;stroke:#ffffff;stroke-width:4.68179;stroke-linecap:round"
d="m 82.714904,99.255233 a 7.0058789,6.6371479 0 0 1 -7.00587,6.637147 7.0058789,6.6371479 0 0 1 -7.00588,-6.637147 7.0058789,6.6371479 0 0 1 7.00588,-6.63714 7.0058789,6.6371479 0 0 1 7.00587,6.63714 z" />
<path
id="path1218-73-4"
style="fill:#ffffff;fill-opacity:1;stroke:#ffffff;stroke-width:4.68179;stroke-linecap:round"
d="m 93.813184,96.584163 a 7.0058789,6.6371479 0 0 1 -7.00588,6.637147 7.0058789,6.6371479 0 0 1 -7.00587,-6.637147 7.0058789,6.6371479 0 0 1 7.00587,-6.63715 7.0058789,6.6371479 0 0 1 7.00588,6.63715 z" />
<path
id="path1218-5-9-3"
style="fill:#ffffff;fill-opacity:1;stroke:#ffffff;stroke-width:4.68179;stroke-linecap:round"
d="m 104.85924,98.601273 a 7.0058789,6.6371479 0 0 1 -7.005886,6.637147 7.0058789,6.6371479 0 0 1 -7.00588,-6.637147 7.0058789,6.6371479 0 0 1 7.00588,-6.63716 7.0058789,6.6371479 0 0 1 7.005886,6.63716 z" />
<path
id="path1218-8-7-8"
style="fill:#ffffff;fill-opacity:1;stroke:#ffffff;stroke-width:4.68179;stroke-linecap:round"
d="m 116.52015,96.473773 a 7.0058785,6.6371479 0 0 1 -7.00589,6.637127 7.0058785,6.6371479 0 0 1 -7.00588,-6.637127 7.0058785,6.6371479 0 0 1 7.00588,-6.63714 7.0058785,6.6371479 0 0 1 7.00589,6.63714 z" />
<path
id="path1218-4-7-4"
style="fill:#ffffff;fill-opacity:1;stroke:#ffffff;stroke-width:4.68179;stroke-linecap:round"
d="m 127.54274,99.255233 a 7.0058789,6.6371479 0 0 1 -7.00589,6.637147 7.0058789,6.6371479 0 0 1 -7.00588,-6.637147 7.0058789,6.6371479 0 0 1 7.00588,-6.63714 7.0058789,6.6371479 0 0 1 7.00589,6.63714 z" />
<path
id="path1218-7-9-00"
style="fill:#ffffff;fill-opacity:1;stroke:#ffffff;stroke-width:4.68179;stroke-linecap:round"
d="m 139.22941,96.505103 a 7.0058785,6.6371479 0 0 1 -7.00587,6.637147 7.0058785,6.6371479 0 0 1 -7.00588,-6.637147 7.0058785,6.6371479 0 0 1 7.00588,-6.63716 7.0058785,6.6371479 0 0 1 7.00587,6.63716 z" />
<path
id="path1218-1-5-9"
style="fill:#ffffff;fill-opacity:1;stroke:#ffffff;stroke-width:4.68179;stroke-linecap:round"
d="m 150.77607,98.871153 a 7.0058785,6.6371479 0 0 1 -7.00588,6.637137 7.0058785,6.6371479 0 0 1 -7.00588,-6.637137 7.0058785,6.6371479 0 0 1 7.00588,-6.63715 7.0058785,6.6371479 0 0 1 7.00588,6.63715 z" />
<g
id="g2682"
style="fill:url(#linearGradient2708);fill-opacity:1;stroke:url(#linearGradient2708)"
inkscape:export-filename="/Users/whx424/Pictures/Illustrations/taxprofiler_v11.png"
inkscape:export-xdpi="159"
inkscape:export-ydpi="159"
transform="matrix(3.7156967,0,0,3.7156967,-30.828156,-200.55501)">
<ellipse
style="fill:url(#linearGradient6027);fill-opacity:1;stroke:url(#linearGradient6029);stroke-width:1;stroke-linecap:round;stroke-miterlimit:4;stroke-dasharray:none"
id="path1218-5"
cx="16.378149"
cy="80.642143"
rx="1.4964142"
ry="1.4176555" />
<ellipse
style="fill:url(#linearGradient6031);fill-opacity:1;stroke:url(#linearGradient6033);stroke-width:1;stroke-linecap:round;stroke-miterlimit:4;stroke-dasharray:none"
id="path1218-8"
cx="19.516428"
cy="80.069572"
rx="1.4964142"
ry="1.4176555" />
<ellipse
style="fill:url(#linearGradient6035);fill-opacity:1;stroke:url(#linearGradient6037);stroke-width:1;stroke-linecap:round;stroke-miterlimit:4;stroke-dasharray:none"
id="path1218-4"
cx="22.48292"
cy="80.818146"
rx="1.4964142"
ry="1.4176555" />
<ellipse
style="fill:url(#linearGradient6039);fill-opacity:1;stroke:url(#linearGradient6041);stroke-width:1;stroke-linecap:round;stroke-miterlimit:4;stroke-dasharray:none"
id="path1218-7"
cx="25.628136"
cy="80.078003"
rx="1.4964142"
ry="1.4176555" />
<ellipse
style="fill:url(#linearGradient6043);fill-opacity:1;stroke:url(#linearGradient6045);stroke-width:1;stroke-linecap:round;stroke-miterlimit:4;stroke-dasharray:none"
id="path1218-1"
cx="28.735676"
cy="80.714775"
rx="1.4964142"
ry="1.4176555" />
<ellipse
style="fill:url(#linearGradient6047);fill-opacity:1;stroke:url(#linearGradient6049);stroke-width:1;stroke-linecap:round;stroke-miterlimit:4;stroke-dasharray:none"
id="path1218-73"
cx="31.72254"
cy="79.995911"
rx="1.4964142"
ry="1.4176555" />
<ellipse
style="fill:url(#linearGradient6051);fill-opacity:1;stroke:url(#linearGradient6053);stroke-width:1;stroke-linecap:round;stroke-miterlimit:4;stroke-dasharray:none"
id="path1218-5-9"
cx="34.695347"
cy="80.538773"
rx="1.4964142"
ry="1.4176555" />
<ellipse
style="fill:url(#linearGradient6055);fill-opacity:1;stroke:url(#linearGradient6057);stroke-width:1;stroke-linecap:round;stroke-miterlimit:4;stroke-dasharray:none"
id="path1218-8-7"
cx="37.83363"
cy="79.966202"
rx="1.4964142"
ry="1.4176555" />
<ellipse
style="fill:url(#linearGradient6059);fill-opacity:1;stroke:url(#linearGradient6061);stroke-width:1;stroke-linecap:round;stroke-miterlimit:4;stroke-dasharray:none"
id="path1218-4-7"
cx="40.800121"
cy="80.714775"
rx="1.4964142"
ry="1.4176555" />
<ellipse
style="fill:url(#linearGradient6063);fill-opacity:1;stroke:url(#linearGradient6065);stroke-width:1;stroke-linecap:round;stroke-miterlimit:4;stroke-dasharray:none"
id="path1218-7-9"
cx="43.945339"
cy="79.974632"
rx="1.4964142"
ry="1.4176555" />
<ellipse
style="fill:url(#linearGradient6067);fill-opacity:1;stroke:url(#linearGradient6069);stroke-width:1;stroke-linecap:round;stroke-miterlimit:4;stroke-dasharray:none"
id="path1218-1-5"
cx="47.052876"
cy="80.611404"
rx="1.4964142"
ry="1.4176555" />
</g>
</g>
</g>
</svg>

After

Width:  |  Height:  |  Size: 22 KiB

View file

@ -0,0 +1,445 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!-- Created with Inkscape (http://www.inkscape.org/) -->
<svg
width="180mm"
height="180mm"
viewBox="0 0 180 180"
version="1.1"
id="svg5581"
inkscape:version="1.2 (1:1.2+202206011327+fc4e4096c5)"
sodipodi:docname="nf-core-taxprofiler_icon_border.svg"
xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
xmlns:xlink="http://www.w3.org/1999/xlink"
xmlns="http://www.w3.org/2000/svg"
xmlns:svg="http://www.w3.org/2000/svg">
<sodipodi:namedview
id="namedview5583"
pagecolor="#ffffff"
bordercolor="#666666"
borderopacity="1.0"
inkscape:showpageshadow="2"
inkscape:pageopacity="0.0"
inkscape:pagecheckerboard="0"
inkscape:deskcolor="#d1d1d1"
inkscape:document-units="mm"
showgrid="true"
inkscape:zoom="0.41291035"
inkscape:cx="449.25006"
inkscape:cy="446.82823"
inkscape:window-width="1920"
inkscape:window-height="1016"
inkscape:window-x="784"
inkscape:window-y="1107"
inkscape:window-maximized="1"
inkscape:current-layer="layer1">
<inkscape:grid
type="xygrid"
id="grid6096"
originx="-7.7520638"
originy="-7.8991984"
dotted="true" />
</sodipodi:namedview>
<defs
id="defs5578">
<linearGradient
inkscape:collect="always"
xlink:href="#linearGradient21662"
id="linearGradient30057"
x1="10.213824"
y1="221.42242"
x2="218.95003"
y2="221.42242"
gradientUnits="userSpaceOnUse"
gradientTransform="matrix(0.6691607,0.01747377,-0.01751914,0.67089835,5.6293239,-100.28017)" />
<linearGradient
x1="0"
y1="0"
x2="1"
y2="0"
gradientUnits="userSpaceOnUse"
gradientTransform="matrix(37.87862,-29.594021,-29.594021,-37.87862,275.46292,136.24821)"
spreadMethod="pad"
id="linearGradient21662">
<stop
style="stop-opacity:1;stop-color:#000000"
offset="0"
id="stop21652" />
<stop
style="stop-opacity:1;stop-color:#0c542a"
offset="0.214724"
id="stop21654" />
<stop
style="stop-opacity:1;stop-color:#25af64"
offset="0.84662598"
id="stop21656" />
<stop
style="stop-opacity:1;stop-color:#25af64"
offset="0.846626"
id="stop21658" />
<stop
style="stop-opacity:1;stop-color:#25af64"
offset="1"
id="stop21660" />
</linearGradient>
<linearGradient
inkscape:collect="always"
xlink:href="#linearGradient21662"
id="linearGradient2708"
x1="14.381735"
y1="80.392174"
x2="49.04929"
y2="80.392174"
gradientUnits="userSpaceOnUse" />
<linearGradient
inkscape:collect="always"
xlink:href="#linearGradient21662"
id="linearGradient6027"
gradientUnits="userSpaceOnUse"
x1="14.381735"
y1="80.392174"
x2="49.04929"
y2="80.392174" />
<linearGradient
inkscape:collect="always"
xlink:href="#linearGradient21662"
id="linearGradient6029"
gradientUnits="userSpaceOnUse"
x1="14.381735"
y1="80.392174"
x2="49.04929"
y2="80.392174" />
<linearGradient
inkscape:collect="always"
xlink:href="#linearGradient21662"
id="linearGradient6031"
gradientUnits="userSpaceOnUse"
x1="14.381735"
y1="80.392174"
x2="49.04929"
y2="80.392174" />
<linearGradient
inkscape:collect="always"
xlink:href="#linearGradient21662"
id="linearGradient6033"
gradientUnits="userSpaceOnUse"
x1="14.381735"
y1="80.392174"
x2="49.04929"
y2="80.392174" />
<linearGradient
inkscape:collect="always"
xlink:href="#linearGradient21662"
id="linearGradient6035"
gradientUnits="userSpaceOnUse"
x1="14.381735"
y1="80.392174"
x2="49.04929"
y2="80.392174" />
<linearGradient
inkscape:collect="always"
xlink:href="#linearGradient21662"
id="linearGradient6037"
gradientUnits="userSpaceOnUse"
x1="14.381735"
y1="80.392174"
x2="49.04929"
y2="80.392174" />
<linearGradient
inkscape:collect="always"
xlink:href="#linearGradient21662"
id="linearGradient6039"
gradientUnits="userSpaceOnUse"
x1="14.381735"
y1="80.392174"
x2="49.04929"
y2="80.392174" />
<linearGradient
inkscape:collect="always"
xlink:href="#linearGradient21662"
id="linearGradient6041"
gradientUnits="userSpaceOnUse"
x1="14.381735"
y1="80.392174"
x2="49.04929"
y2="80.392174" />
<linearGradient
inkscape:collect="always"
xlink:href="#linearGradient21662"
id="linearGradient6043"
gradientUnits="userSpaceOnUse"
x1="14.381735"
y1="80.392174"
x2="49.04929"
y2="80.392174" />
<linearGradient
inkscape:collect="always"
xlink:href="#linearGradient21662"
id="linearGradient6045"
gradientUnits="userSpaceOnUse"
x1="14.381735"
y1="80.392174"
x2="49.04929"
y2="80.392174" />
<linearGradient
inkscape:collect="always"
xlink:href="#linearGradient21662"
id="linearGradient6047"
gradientUnits="userSpaceOnUse"
x1="14.381735"
y1="80.392174"
x2="49.04929"
y2="80.392174" />
<linearGradient
inkscape:collect="always"
xlink:href="#linearGradient21662"
id="linearGradient6049"
gradientUnits="userSpaceOnUse"
x1="14.381735"
y1="80.392174"
x2="49.04929"
y2="80.392174" />
<linearGradient
inkscape:collect="always"
xlink:href="#linearGradient21662"
id="linearGradient6051"
gradientUnits="userSpaceOnUse"
x1="14.381735"
y1="80.392174"
x2="49.04929"
y2="80.392174" />
<linearGradient
inkscape:collect="always"
xlink:href="#linearGradient21662"
id="linearGradient6053"
gradientUnits="userSpaceOnUse"
x1="14.381735"
y1="80.392174"
x2="49.04929"
y2="80.392174" />
<linearGradient
inkscape:collect="always"
xlink:href="#linearGradient21662"
id="linearGradient6055"
gradientUnits="userSpaceOnUse"
x1="14.381735"
y1="80.392174"
x2="49.04929"
y2="80.392174" />
<linearGradient
inkscape:collect="always"
xlink:href="#linearGradient21662"
id="linearGradient6057"
gradientUnits="userSpaceOnUse"
x1="14.381735"
y1="80.392174"
x2="49.04929"
y2="80.392174" />
<linearGradient
inkscape:collect="always"
xlink:href="#linearGradient21662"
id="linearGradient6059"
gradientUnits="userSpaceOnUse"
x1="14.381735"
y1="80.392174"
x2="49.04929"
y2="80.392174" />
<linearGradient
inkscape:collect="always"
xlink:href="#linearGradient21662"
id="linearGradient6061"
gradientUnits="userSpaceOnUse"
x1="14.381735"
y1="80.392174"
x2="49.04929"
y2="80.392174" />
<linearGradient
inkscape:collect="always"
xlink:href="#linearGradient21662"
id="linearGradient6063"
gradientUnits="userSpaceOnUse"
x1="14.381735"
y1="80.392174"
x2="49.04929"
y2="80.392174" />
<linearGradient
inkscape:collect="always"
xlink:href="#linearGradient21662"
id="linearGradient6065"
gradientUnits="userSpaceOnUse"
x1="14.381735"
y1="80.392174"
x2="49.04929"
y2="80.392174" />
<linearGradient
inkscape:collect="always"
xlink:href="#linearGradient21662"
id="linearGradient6067"
gradientUnits="userSpaceOnUse"
x1="14.381735"
y1="80.392174"
x2="49.04929"
y2="80.392174" />
<linearGradient
inkscape:collect="always"
xlink:href="#linearGradient21662"
id="linearGradient6069"
gradientUnits="userSpaceOnUse"
x1="14.381735"
y1="80.392174"
x2="49.04929"
y2="80.392174" />
</defs>
<g
inkscape:label="Layer 1"
inkscape:groupmode="layer"
id="layer1"
transform="translate(-7.7520638,-7.8991986)">
<g
id="g6123"
transform="translate(17.317549,39.640907)">
<path
id="path30012-5"
style="fill:#ffffff;fill-opacity:1;stroke:none;stroke-width:0.424868;stroke-linecap:round"
d="M 9.5900834,8.8480637 C 7.5556439,12.028833 7.7408039,16.123943 7.7687039,19.769833 c 0.83412,6.65305 4.1623501,13.08043 9.6588001,17.05639 2.95411,2.45491 6.166,4.56934 9.30414,6.77667 -1.90794,4.17676 -2.98236,9.29825 -0.29786,13.40918 1.67713,3.72229 4.84583,6.89989 5.31985,11.06218 -0.73221,2.86643 -3.54676,6.3663 -0.97712,9.12477 2.12262,2.77099 4.06395,5.64907 5.35322,8.82231 2.02674,3.35439 6.5976,4.09759 10.20366,3.73748 2.47738,-0.18007 4.72202,-2.67715 3.28025,-5.08004 -1.18247,-2.55998 -4.28744,-2.18411 -6.38635,-3.25125 -1.36801,-2.44734 -3.00653,-5.99123 -0.26124,-8.17889 2.5926,-2.70872 4.69882,-5.81991 6.94515,-8.77398 2.71899,-1.24434 5.98816,-0.0654 8.84595,0.2859 10.88656,2.48435 21.31368,6.88721 32.36055,8.64773 3.13924,0.41138 6.76175,-0.45373 9.579516,0.92166 1.40942,4.43747 2.34224,9.45414 5.76225,12.84528 2.6934,1.93573 7.02228,2.12481 9.53599,-0.13787 1.60636,-2.93172 -1.52253,-5.89536 -4.20194,-6.62585 -0.40071,-2.8223 -0.37969,-5.87179 0.26854,-8.58529 3.77785,-0.24182 7.24094,1.16954 10.61732,2.45295 1.78619,3.3965 5.12259,5.96321 8.91186,6.64761 1.60415,-1.40492 2.56073,1.72858 3.8909,2.50862 2.79709,2.93023 6.8997,5.0038 10.99366,4.75587 3.26301,-0.67749 3.79546,-4.59452 2.47471,-7.17014 -0.80988,-4.2018 -3.22071,-8.59389 -1.2047,-12.82352 1.17357,-5.20598 -1.35199,-10.96325 -5.77146,-13.90702 -4.33733,-3.64436 -10.39077,-3.64653 -14.92742,-6.93614 -10.03223,-6.15469 -21.64077,-9.58338 -33.280326,-11.28456 -12.56653,-0.98847 -25.20371,-0.69003 -37.78225,-1.58411 -7.09295,-0.25502 -14.55087,-1.15633 -21.23347,1.84537 -3.9067,-0.36049 -7.22928,-2.98747 -10.60062,-4.8325 -3.63865,-2.26998 -7.6488,-4.94361 -8.46777,-9.49866 -1.63814,-4.07488 -1.20485,-8.6292 -2.57509,-12.7242393 -0.092,-1.43702 -3.1468606,-2.01015 -3.5173206,-0.42766 z" />
<path
style="fill:url(#linearGradient30057);fill-opacity:1;stroke:none;stroke-width:0.424868;stroke-linecap:round"
d="M 10.931314,9.9653233 C 9.7827934,11.308473 9.8484234,13.338583 9.5801634,15.027613 c -0.34115,2.76171 -0.29206,5.58762 0.6731896,8.22888 1.139301,3.87108 2.982591,7.65359 6.139101,10.27078 3.79198,3.54184 8.2401,6.33641 12.43029,9.30118 -0.91535,2.99306 -2.65543,5.87431 -2.30804,9.13471 0.18061,2.677 1.94571,4.91009 3.11856,7.23201 1.50937,2.8256 3.84729,5.54985 3.82514,8.91032 -0.0787,2.49314 -2.48083,4.5082 -1.96562,7.03872 1.67419,2.68138 3.992,4.96942 5.15932,7.96769 0.65556,1.60629 1.4695,3.40539 3.31812,3.87174 2.38126,0.82152 5.41091,1.64045 7.63103,0.0125 0.82635,-1.48261 -1.06902,-2.72955 -2.36736,-2.87342 -1.71199,-0.17589 -3.69466,-0.59568 -4.39228,-2.41244 -0.96281,-2.26721 -2.29358,-4.88743 -1.3576,-7.36922 2.00069,-3.29907 5.15025,-5.72284 7.05316,-9.10416 0.97281,-1.66962 2.43831,-3.35242 4.55042,-3.26353 6.66383,-0.13609 13.02922,2.25844 19.35504,4.0482 6.38637,1.7963 12.67764,4.02096 19.20445,5.25889 3.77887,0.58276 7.72718,-0.18602 11.419706,0.93529 1.63461,0.75428 1.35512,2.9334 2.05535,4.34159 1.1597,3.06423 1.81242,6.5515 4.16823,8.9707 1.76834,1.42588 4.39281,1.36118 6.4399,0.63293 1.20689,-0.70879 -0.0705,-2.295 -0.82735,-2.84018 -0.85353,-0.97431 -2.49109,-0.81793 -3.19991,-1.88904 -0.61703,-3.21399 -0.48223,-6.57875 0.0181,-9.80404 0.2319,-1.63989 2.04068,-2.10159 3.44204,-1.83986 3.30625,0.10127 6.50718,1.46919 9.55674,2.3424 -0.71254,-1.81326 -1.19119,-3.75193 -0.97348,-5.72815 0.0204,-1.69846 0.15688,-3.58482 1.5917,-4.73058 1.83011,-1.86627 3.60232,-3.85815 5.70315,-5.41088 0.57552,0.16274 -0.36083,0.85452 -0.4987,1.09386 -1.90807,2.09201 -4.24787,3.84191 -5.83093,6.20217 -0.46736,1.92903 -0.41493,3.99677 -0.24575,5.96645 0.84833,2.57726 2.1706,5.12487 4.1346,7.01585 1.36224,0.87656 2.75795,2.00764 4.37729,2.2713 1.6213,-0.74843 1.95011,-2.85006 2.55124,-4.35962 1.10913,-3.99969 1.90782,-8.08534 2.71945,-12.15319 0.1702,-1.75995 0.75519,-4.04477 -0.9398,-5.26766 -0.66443,-0.31636 -0.15084,-1.05324 0.36147,-0.52827 2.02638,1.51086 1.30614,4.35901 1.06011,6.49585 -0.85333,4.26646 -1.66477,8.56551 -2.96213,12.72302 -0.11797,0.91577 -1.34029,1.9982 -0.53389,2.72401 2.58751,2.57362 4.81242,5.92113 8.5085,6.96547 1.59827,0.50471 3.80045,1.66945 5.18808,0.14586 1.17766,-1.4017 -0.0976,-3.1877 -0.28432,-4.72092 -0.75396,-3.29433 -2.15674,-6.56045 -1.96438,-9.99106 0.37367,-2.10533 1.65495,-4.11536 1.02167,-6.32262 -0.34992,-2.84568 -1.45335,-5.69952 -3.77909,-7.50012 -2.77154,-2.67603 -6.40479,-4.14933 -10.09936,-5.01092 -3.81561,-1.24593 -6.9919,-3.83431 -10.61749,-5.50364 -6.72466,-3.69962 -14.2084,-5.65169 -21.615834,-7.44474 -8.303242,-1.93229 -16.888842,-1.47941 -25.343602,-1.84797 -8.46313,-0.1352 -16.90366,-0.80082 -25.3617,-1.05107 -4.38737,-0.20312 -8.92121,-0.0523 -12.99753,1.77349 -2.83378,1.235 -5.55113,-0.83445 -8.06653,-1.90121 -3.95435,-2.11512 -8.20653,-4.63235 -11.45036,-7.81607 -1.44093,-1.52832 -1.98381,-3.13498 -2.61394,-4.93556 -0.98461,-2.81345 -1.62386,-5.97312 -1.9462,-9.06999 -0.24149,-1.39646 -0.0764,-3.01443 -0.81326,-4.2426397 -0.0219,-0.007 -0.045,-0.0107 -0.0678,-0.007 z"
id="path30012"
sodipodi:nodetypes="cccccccccccccccccccccccccccccccccccccccccccccccccccccccccscccc"
inkscape:export-filename="/Users/whx424/Pictures/Illustrations/taxprofiler_v11.png"
inkscape:export-xdpi="159"
inkscape:export-ydpi="159" />
<path
id="path1218-5-4"
style="fill:#ffffff;fill-opacity:1;stroke:#ffffff;stroke-width:4.68179;stroke-linecap:round"
d="m 36.798094,98.985353 a 7.0058785,6.6371479 0 0 1 -7.00588,6.637147 7.0058785,6.6371479 0 0 1 -7.00589,-6.637147 7.0058785,6.6371479 0 0 1 7.00589,-6.63713 7.0058785,6.6371479 0 0 1 7.00588,6.63713 z" />
<path
id="path1218-8-0"
style="fill:#ffffff;fill-opacity:1;stroke:#ffffff;stroke-width:4.68179;stroke-linecap:round"
d="m 48.458974,96.857863 a 7.0058789,6.6371479 0 0 1 -7.00588,6.637147 7.0058789,6.6371479 0 0 1 -7.00588,-6.637147 7.0058789,6.6371479 0 0 1 7.00588,-6.63715 7.0058789,6.6371479 0 0 1 7.00588,6.63715 z" />
<path
id="path1218-4-46"
style="fill:#ffffff;fill-opacity:1;stroke:#ffffff;stroke-width:4.68179;stroke-linecap:round"
d="m 59.481564,99.639333 a 7.0058789,6.6371479 0 0 1 -7.00588,6.637157 7.0058789,6.6371479 0 0 1 -7.00588,-6.637157 7.0058789,6.6371479 0 0 1 7.00588,-6.63715 7.0058789,6.6371479 0 0 1 7.00588,6.63715 z" />
<path
id="path1218-7-4"
style="fill:#ffffff;fill-opacity:1;stroke:#ffffff;stroke-width:4.68179;stroke-linecap:round"
d="m 71.168234,96.889183 a 7.0058785,6.6371479 0 0 1 -7.00589,6.637157 7.0058785,6.6371479 0 0 1 -7.00588,-6.637157 7.0058785,6.6371479 0 0 1 7.00588,-6.63715 7.0058785,6.6371479 0 0 1 7.00589,6.63715 z" />
<path
id="path1218-1-6"
style="fill:#ffffff;fill-opacity:1;stroke:#ffffff;stroke-width:4.68179;stroke-linecap:round"
d="m 82.714904,99.255233 a 7.0058789,6.6371479 0 0 1 -7.00587,6.637147 7.0058789,6.6371479 0 0 1 -7.00588,-6.637147 7.0058789,6.6371479 0 0 1 7.00588,-6.63714 7.0058789,6.6371479 0 0 1 7.00587,6.63714 z" />
<path
id="path1218-73-4"
style="fill:#ffffff;fill-opacity:1;stroke:#ffffff;stroke-width:4.68179;stroke-linecap:round"
d="m 93.813184,96.584163 a 7.0058789,6.6371479 0 0 1 -7.00588,6.637147 7.0058789,6.6371479 0 0 1 -7.00587,-6.637147 7.0058789,6.6371479 0 0 1 7.00587,-6.63715 7.0058789,6.6371479 0 0 1 7.00588,6.63715 z" />
<path
id="path1218-5-9-3"
style="fill:#ffffff;fill-opacity:1;stroke:#ffffff;stroke-width:4.68179;stroke-linecap:round"
d="m 104.85924,98.601273 a 7.0058789,6.6371479 0 0 1 -7.005886,6.637147 7.0058789,6.6371479 0 0 1 -7.00588,-6.637147 7.0058789,6.6371479 0 0 1 7.00588,-6.63716 7.0058789,6.6371479 0 0 1 7.005886,6.63716 z" />
<path
id="path1218-8-7-8"
style="fill:#ffffff;fill-opacity:1;stroke:#ffffff;stroke-width:4.68179;stroke-linecap:round"
d="m 116.52015,96.473773 a 7.0058785,6.6371479 0 0 1 -7.00589,6.637127 7.0058785,6.6371479 0 0 1 -7.00588,-6.637127 7.0058785,6.6371479 0 0 1 7.00588,-6.63714 7.0058785,6.6371479 0 0 1 7.00589,6.63714 z" />
<path
id="path1218-4-7-4"
style="fill:#ffffff;fill-opacity:1;stroke:#ffffff;stroke-width:4.68179;stroke-linecap:round"
d="m 127.54274,99.255233 a 7.0058789,6.6371479 0 0 1 -7.00589,6.637147 7.0058789,6.6371479 0 0 1 -7.00588,-6.637147 7.0058789,6.6371479 0 0 1 7.00588,-6.63714 7.0058789,6.6371479 0 0 1 7.00589,6.63714 z" />
<path
id="path1218-7-9-00"
style="fill:#ffffff;fill-opacity:1;stroke:#ffffff;stroke-width:4.68179;stroke-linecap:round"
d="m 139.22941,96.505103 a 7.0058785,6.6371479 0 0 1 -7.00587,6.637147 7.0058785,6.6371479 0 0 1 -7.00588,-6.637147 7.0058785,6.6371479 0 0 1 7.00588,-6.63716 7.0058785,6.6371479 0 0 1 7.00587,6.63716 z" />
<path
id="path1218-1-5-9"
style="fill:#ffffff;fill-opacity:1;stroke:#ffffff;stroke-width:4.68179;stroke-linecap:round"
d="m 150.77607,98.871153 a 7.0058785,6.6371479 0 0 1 -7.00588,6.637137 7.0058785,6.6371479 0 0 1 -7.00588,-6.637137 7.0058785,6.6371479 0 0 1 7.00588,-6.63715 7.0058785,6.6371479 0 0 1 7.00588,6.63715 z" />
<g
id="g2682"
style="fill:url(#linearGradient2708);fill-opacity:1;stroke:url(#linearGradient2708)"
inkscape:export-filename="/Users/whx424/Pictures/Illustrations/taxprofiler_v11.png"
inkscape:export-xdpi="159"
inkscape:export-ydpi="159"
transform="matrix(3.7156967,0,0,3.7156967,-30.828156,-200.55501)">
<ellipse
style="fill:url(#linearGradient6027);fill-opacity:1;stroke:url(#linearGradient6029);stroke-width:1;stroke-linecap:round;stroke-miterlimit:4;stroke-dasharray:none"
id="path1218-5"
cx="16.378149"
cy="80.642143"
rx="1.4964142"
ry="1.4176555" />
<ellipse
style="fill:url(#linearGradient6031);fill-opacity:1;stroke:url(#linearGradient6033);stroke-width:1;stroke-linecap:round;stroke-miterlimit:4;stroke-dasharray:none"
id="path1218-8"
cx="19.516428"
cy="80.069572"
rx="1.4964142"
ry="1.4176555" />
<ellipse
style="fill:url(#linearGradient6035);fill-opacity:1;stroke:url(#linearGradient6037);stroke-width:1;stroke-linecap:round;stroke-miterlimit:4;stroke-dasharray:none"
id="path1218-4"
cx="22.48292"
cy="80.818146"
rx="1.4964142"
ry="1.4176555" />
<ellipse
style="fill:url(#linearGradient6039);fill-opacity:1;stroke:url(#linearGradient6041);stroke-width:1;stroke-linecap:round;stroke-miterlimit:4;stroke-dasharray:none"
id="path1218-7"
cx="25.628136"
cy="80.078003"
rx="1.4964142"
ry="1.4176555" />
<ellipse
style="fill:url(#linearGradient6043);fill-opacity:1;stroke:url(#linearGradient6045);stroke-width:1;stroke-linecap:round;stroke-miterlimit:4;stroke-dasharray:none"
id="path1218-1"
cx="28.735676"
cy="80.714775"
rx="1.4964142"
ry="1.4176555" />
<ellipse
style="fill:url(#linearGradient6047);fill-opacity:1;stroke:url(#linearGradient6049);stroke-width:1;stroke-linecap:round;stroke-miterlimit:4;stroke-dasharray:none"
id="path1218-73"
cx="31.72254"
cy="79.995911"
rx="1.4964142"
ry="1.4176555" />
<ellipse
style="fill:url(#linearGradient6051);fill-opacity:1;stroke:url(#linearGradient6053);stroke-width:1;stroke-linecap:round;stroke-miterlimit:4;stroke-dasharray:none"
id="path1218-5-9"
cx="34.695347"
cy="80.538773"
rx="1.4964142"
ry="1.4176555" />
<ellipse
style="fill:url(#linearGradient6055);fill-opacity:1;stroke:url(#linearGradient6057);stroke-width:1;stroke-linecap:round;stroke-miterlimit:4;stroke-dasharray:none"
id="path1218-8-7"
cx="37.83363"
cy="79.966202"
rx="1.4964142"
ry="1.4176555" />
<ellipse
style="fill:url(#linearGradient6059);fill-opacity:1;stroke:url(#linearGradient6061);stroke-width:1;stroke-linecap:round;stroke-miterlimit:4;stroke-dasharray:none"
id="path1218-4-7"
cx="40.800121"
cy="80.714775"
rx="1.4964142"
ry="1.4176555" />
<ellipse
style="fill:url(#linearGradient6063);fill-opacity:1;stroke:url(#linearGradient6065);stroke-width:1;stroke-linecap:round;stroke-miterlimit:4;stroke-dasharray:none"
id="path1218-7-9"
cx="43.945339"
cy="79.974632"
rx="1.4964142"
ry="1.4176555" />
<ellipse
style="fill:url(#linearGradient6067);fill-opacity:1;stroke:url(#linearGradient6069);stroke-width:1;stroke-linecap:round;stroke-miterlimit:4;stroke-dasharray:none"
id="path1218-1-5"
cx="47.052876"
cy="80.611404"
rx="1.4964142"
ry="1.4176555" />
</g>
</g>
</g>
</svg>

After

Width:  |  Height:  |  Size: 22 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 389 KiB

File diff suppressed because it is too large Load diff

After

Width:  |  Height:  |  Size: 87 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 433 KiB

File diff suppressed because it is too large Load diff

After

Width:  |  Height:  |  Size: 87 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 139 KiB

File diff suppressed because it is too large Load diff

After

Width:  |  Height:  |  Size: 124 KiB

View file

@ -8,56 +8,146 @@
<!-- TODO nf-core: Add documentation about anything specific to running your pipeline. For general topics, please point to (and add to) the main nf-core website. -->
## Samplesheet input
## Samplesheet inputs
You will need to create a samplesheet with information about the samples you would like to analyse before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row as shown in the examples below.
nf-core/taxprofiler can accept as input raw or preprocessed single- or paired-end short-read (e.g. Illumina) FASTQ files, long-read FASTQ files (e.g. Oxford Nanopore), or FASTA sequences (available for a subset of profilers).
```bash
--input '[path to samplesheet file]'
> ⚠️ Input FASTQ files _must_ be gzipped, while FASTA files may optionally be uncompressed (although this is not recommended)
You will need to create a samplesheet with information about the samples you would like to analyse before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 6 columns, and a header row as shown in the examples below. Furthermother, nf-core/taxprofiler also requires a second comma-separated file of 3 columns with a header row as in the examples below.
This samplesheet is then specified on the command line as follows:
```console
--input '[path to samplesheet file]' --databases '[path to database sheet file]'
```
### Multiple runs of the same sample
The `sample` identifiers have to be the same when you have re-sequenced the same sample more than once e.g. to increase sequencing depth. The pipeline will concatenate the raw reads before performing any downstream analysis. Below is an example for the same sample sequenced across 3 lanes:
The `sample` identifiers have to be the same when you have re-sequenced the same sample more than once e.g. to increase sequencing depth. The pipeline will concatenate different runs FASTQ files of the same sample before performing profiling, when `--perform_runmerging` is supplied. Below is an example for the same sample sequenced across 3 lanes:
```console
sample,fastq_1,fastq_2
CONTROL_REP1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz
CONTROL_REP1,AEG588A1_S1_L003_R1_001.fastq.gz,AEG588A1_S1_L003_R2_001.fastq.gz
CONTROL_REP1,AEG588A1_S1_L004_R1_001.fastq.gz,AEG588A1_S1_L004_R2_001.fastq.gz
sample,run_accession,instrument_platform,fastq_1,fastq_2,fasta
2612,run1,ILLUMINA,2612_run1_R1.fq.gz,,
2612,run2,ILLUMINA,2612_run2_R1.fq.gz,,
2612,run3,ILLUMINA,2612_run3_R1.fq.gz,2612_run3_R2.fq.gz,
```
> ⚠️ Runs of the same sample sequenced on Illumina platforms with a combination of single and paired-end data will **not** be run-wise concatenated, unless pair-merging is specified. In the example above, `run3` will be profiled independently of `run1` and `run2` if pairs are not merged.
### Full samplesheet
The pipeline will auto-detect whether a sample is single- or paired-end using the information provided in the samplesheet. The samplesheet can have as many columns as you desire, however, there is a strict requirement for the first 3 columns to match those defined in the table below.
The pipeline will auto-detect whether a sample is single- or paired-end using the information provided in the samplesheet. The samplesheet can have as many columns as you desire, however, there is a strict requirement for the first 6 columns to match those defined in the table below.
A final samplesheet file consisting of both single- and paired-end data may look something like the one below. This is for 6 samples, where `TREATMENT_REP3` has been sequenced twice.
A final samplesheet file consisting of both single- and paired-end data, as well as long-read FASTA files may look something like the one below. This is for 6 samples, where `2612` has been sequenced twice.
```console
sample,fastq_1,fastq_2
CONTROL_REP1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz
CONTROL_REP2,AEG588A2_S2_L002_R1_001.fastq.gz,AEG588A2_S2_L002_R2_001.fastq.gz
CONTROL_REP3,AEG588A3_S3_L002_R1_001.fastq.gz,AEG588A3_S3_L002_R2_001.fastq.gz
TREATMENT_REP1,AEG588A4_S4_L003_R1_001.fastq.gz,
TREATMENT_REP2,AEG588A5_S5_L003_R1_001.fastq.gz,
TREATMENT_REP3,AEG588A6_S6_L003_R1_001.fastq.gz,
TREATMENT_REP3,AEG588A6_S6_L004_R1_001.fastq.gz,
2611,ERR5766174,ILLUMINA,,,/<path>/<to>/fasta/ERX5474930_ERR5766174_1.fa.gz
2612,ERR5766176,ILLUMINA,/<path>/<to>/fastq/ERX5474932_ERR5766176_1.fastq.gz,/<path>/<to>/fastq/ERX5474932_ERR5766176_2.fastq.gz,
2612,ERR5766180,ILLUMINA,/<path>/<to>/fastq/ERX5474936_ERR5766180_1.fastq.gz,,
2613,ERR5766181,ILLUMINA,/<path>/<to>/fastq/ERX5474937_ERR5766181_1.fastq.gz,/<path>/<to>/fastq/ERX5474937_ERR5766181_2.fastq.gz,
ERR3201952,ERR3201952,OXFORD_NANOPORE,/<path>/<to>/fastq/ERR3201952.fastq.gz,,
```
| Column | Description |
| --------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `sample` | Custom sample name. This entry will be identical for multiple sequencing libraries/runs from the same sample. Spaces in sample names are automatically converted to underscores (`_`). |
| `fastq_1` | Full path to FastQ file for Illumina short reads 1. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". |
| `fastq_2` | Full path to FastQ file for Illumina short reads 2. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". |
| Column | Description |
| --------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `sample` | Unique sample name [required]. |
| `run_accession` | Run ID or name unique for each (pairs of) file(s) .Can also supply sample name again here, if only a single run was generated [required]. |
| `instrument_platform` | Sequencing platform reads generated on, selected from the EBI ENA [controlled vocabulary](https://www.ebi.ac.uk/ena/portal/api/controlledVocab?field=instrument_platform) [required]. |
| `fastq_1` | Path or URL to sequencing reads or for Illumina R1 sequencing reads in FASTQ format. GZipped compressed files accepted. Can be left empty if data in FASTA is specifed. Cannot be combined with `fasta`. |
| `fastq_2` | Path or URL to Illumina R2 sequencing reads in FASTQ format. GZipped compressed files accepted. Can be left empty if single end data. Cannot be combined with `fasta`. |
| `fasta` | Path or URL to long-reads or contigs in FASTA format. GZipped compressed files accepted. Can be left empty if data in FASTA is specifed. Cannot be combined with `fastq_1` or `fastq_2`. |
An [example samplesheet](../assets/samplesheet.csv) has been provided with the pipeline.
### Full database sheet
nf-core/taxprofiler supports multiple databases being profiled in parallel for each tool.
Databases can be supplied either in the form of a compressed `.tar.gz` archive of a directory containing all relevant database files or the path to a directory on the filesystem.
The pipeline takes the locations and specific parameters of these databases as input via a four column comma-separated sheet.
> ⚠️ nf-core/taxprofiler does not provide any databases by default, nor does it currently generate them for you. This must be performed manually by the user. See below for more information of the expected database files.
An example database sheet can look as follows, where 4 tools are being used, and `malt` and `kraken2` will be used against two databases each.
```console
tool,db_name,db_params,db_path
malt,malt85,-id 85,/<path>/<to>/malt/testdb-malt/
malt,malt95,-id 90,/<path>/<to>/malt/testdb-malt.tar.gz
kraken2,db1,,/<path>/<to>/kraken2/testdb-kraken2.tar.gz
kraken2,db2,--quick,/<path>/<to>/kraken2/testdb-kraken2.tar.gz
centrifuge,db1,,/<path>/<to>/centrifuge/minigut_cf.tar.gz
metaphlan3,db1,,/<path>/<to>/metaphlan3/metaphlan_database/
motus,db_mOTU,,/<path>/<to>/motus/motus_database/
```
Column specifications are as follows:
| Column | Description |
| ----------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `tool` | Taxonomic profiling tool (supported by nf-core/taxprofiler) that the database has been indexed for [required]. |
| `db_name` | A unique name of the particular database [required]. |
| `db_params` | Any parameters of the given taxonomic profiler that you wish to specify that the taxonomic profiling tool should use when profiling against this specific. Can be empty to use taxonomic profiler defaults Must not be surrounded by quotes [required]. |
| `db_path` | Path to the database. Can either be a path to a directory containing the database index files or a `.tar.gz` file which contains the compressed database directory with the same name as the tar archive, minus `.tar.gz` [required]. |
> 💡 You can also specify the same database directory/file twice (ensuring unique `db_name`s) and specify different parameters for each database to compare the effect of different parameters during profiling.
nf-core/taxprofiler will automatically decompress and extract any compressed archives for you.
Expected (uncompressed) database files for each tool are as follows:
- **MALT** output of `malt-build`. A directory containing:
- `ref.idx`
- `taxonomy.idx`
- `taxonomy.map`
- `index0.idx`
- `table0.idx`
- `table0.db`
- `ref.inf`
- `ref.db`
- `taxonomy.tre`
- **Kraken2** output of `kraken2-build` command(s) A directory containing:
- `opts.k2d`
- `hash.k2d`
- `taxo.k2d`
- **Centrifuge** output of `centrifuge-build`. A directory containing:
- `<database_name>.<number>.cf`
- `<database_name>.<number>.cf`
- `<database_name>.<number>.cf`
- `<database_name>.<number>.cf`
- **MetaPhlAn3** generated with `metaphlan --install` or downloaded from links on the [MetaPhlAn3 wiki](https://github.com/biobakery/MetaPhlAn/wiki/MetaPhlAn-3.0#customizing-the-database). A directory containing:
- `mpa_v30_CHOCOPhlAn_201901.pkl`
- `mpa_v30_CHOCOPhlAn_201901.pkl`
- `mpa_v30_CHOCOPhlAn_201901.fasta`
- `mpa_v30_CHOCOPhlAn_201901.3.bt2`
- `mpa_v30_CHOCOPhlAn_201901.4.bt2`
- `mpa_v30_CHOCOPhlAn_201901.1.bt2`
- `mpa_v30_CHOCOPhlAn_201901.2.bt2`
- `mpa_v30_CHOCOPhlAn_201901.rev.1.bt2`
- `mpa_v30_CHOCOPhlAn_201901.rev.2.bt2`
- `mpa_latest`
- **Kaiju** output of `kaiju-makedb`. A directory containing:
- `kaiju_db_*.fmi`
- `nodes.dmp`
- `names.dmp`
- **DIAMOND** output of `diamond makedb`. Note: requires building with taxonomy files
to generate taxonomic profile. See [DIAMOND documentation](https://github.com/bbuchfink/diamond/wiki/3.-Command-line-options#makedb-options). A file named:
- `<database_name>.dmnd`
- **mOTUs** is composed of code and database together. The mOTUs tools
[`downloadDB`](https://github.com/motu-tool/mOTUs/blob/master/motus/downloadDB.py)
is used to prepare the mOTUs database and create a file with the version information.
The database download step can be time consuming and the database will be consisting
with same release version of the mOTUs tools. The database for same version tools
can be thus reused for multiple runs. Users can download the database once using the script above and
specify the path the database to the TSV table provided to `--databases`.
## Running the pipeline
The typical command for running the pipeline is as follows:
```bash
nextflow run nf-core/taxprofiler --input samplesheet.csv --outdir <OUTDIR> --genome GRCh37 -profile docker
```console
nextflow run nf-core/taxprofiler --input samplesheet.csv --databases databases.csv --outdir <OUTDIR> -profile docker --run_<TOOL1> --run_<TOOL2>
```
This will launch the pipeline with the `docker` configuration profile. See below for more information about profiles.
@ -71,6 +161,72 @@ work # Directory containing the nextflow working files
# Other nextflow hidden files, eg. history of pipeline runs and old logs.
```
### Preprocessing Steps
nf-core/taxprofiler offers four main preprocessing steps
- Read processing: adapter clipping and pair-merging.
- Complexity filtering: removal of low-sequence complexity reads.
- Host read-removal: removal of reads aligning to reference genome(s) of a host.
- Run merging: concatenation of multiple FASTQ chunks/sequencing runs/libraries of a sample.
#### Read Processing
Raw sequencing read processing in the form of adapter clipping and paired-end read merging can be activated via the `--perform_shortread_qc` or `--perform_longread_qc` flags.
It is highly recommended to run this on raw reads to remove artefacts from sequencing that can cause false positive identification of taxa (e.g. contaminated reference genomes) and/or skews in taxonomic abundance profiles.
There are currently two options for short-read preprocessing: `fastp` or `adapterremoval`.
For adapter clipping, you can either rely on tool default adapter sequences, or supply your own adapters (`--shortread_qc_adapter1` and `--shortread_qc_adapter2`)
By default, paired-end merging is not activated and paired-end profiling is performed where supported otherwise pairs will be independently profiled. If paired-end merging is activated you can also specify whether to exclude unmerged reads in the reads sent for profiling (`--shortread_qc_mergepairs` and `--shortread_qc_excludeunmerged`).
You can also turn off clipping and only perform paired-end merging, if requested. This can be useful when processing data downloaded from the ENA, SRA, or DDBJ (`--shortread_qc_skipadaptertrim`).
Both tools support length filtering of reads and can be tuned with `--shortread_qc_minlength`. Performing length filtering can be useful to remove short (often low sequencing complexity) sequences that result in unspecific classification and therefore slow down runtime during profiling, with minimal gain.
There is currently one option for long-read Oxford Nanopore processing: `porechop`.
For both short-read and long-read preprocessing, you can optionally save the resulting processed reads with `--save_preprocessed_reads`.
#### Complexity Filtering
Complexity filtering can be activated via the `--perform_shortread_complexityfilter` flag.
Complexity filtering is primarily a run-time optimisation step. It is not necessary for accurate taxonomic profiling, however it can speed up run-time of each tool by removing reads with low-diversity of nucleotides (e.g. with mono-nucleotide - `AAAAAAAA`, or di-nucleotide repeats `GAGAGAGAGAGAGAG`) that have a low-chance of giving an informative taxonomic ID as they can be associated with many different taxa. Removing these reads therefore saves computational time and resources.
There are currently three options for short-read complexity filtering: [`bbduk`](https://jgi.doe.gov/data-and-tools/software-tools/bbtools/bb-tools-user-guide/bbduk-guide/), [`prinseq++`](https://github.com/Adrian-Cantu/PRINSEQ-plus-plus), and [`fastp`](https://github.com/OpenGene/fastp#low-complexity-filter).
The tools offer different algorithms and parameters for removing low complexity reads. We therefore recommend reviewing the pipeline's [parameter documentation](https://nf-co.re/taxprofiler/parameters) and the documentation of the tools (see links above) to decide on optimal methods and parameters for your dataset.
You can optionally save the FASTQ output of the run merging with the `--save_complexityfiltered_reads`. If running with `fastp`, complexity filtering happens inclusively within the earlier shortread preprocessing step. Therefore there will not be an independent pipeline step for complexity filtering, and no independent FASTQ file (i.e. `--save_complexityfiltered_reads` will be ignored) - your complexity filtered reads will also be in the `fastp/` folder in the same file(s) as the preprocessed read.
#### Host Removal
Removal of possible-host reads from FASTQ files prior profiling can be activated with `--perform_shortread_hostremoval` or `--perform_longread_hostremoval`.
Similarly to complexity filtering, host-removal can be useful for runtime optimisation and reduction in misclassified reads. It is not always necessary to report classification of reads from a host when you already know the host of the sample, therefore you can gain a run-time and computational advantage by removing these prior typically resource-heavy profiling with more efficient methods. Furthermore, particularly with human samples, you can reduce the number of false positives during profiling that occur due to host-sequence contamination in reference genomes on public databases.
nf-core/taxprofiler currently offers host-removal via alignment against a reference genome with Bowtie2, and the use of the unaligned reads for downstream profiling.
You can supply your reference genome in FASTA format with `--hostremoval_reference`. You can also optionally supply a directory containing pre-indexed Bowtie2 index files with `--shortread_hostremoval_index` or `--longread_hostremoval_index`, however nf-core/taxprofiler will generate this for you if necessary. Pre-supplying the directory of index files can greatly speed up the process, and these can be re-used.
> 💡 If you have multiple taxa or sequences you wish to remove (e.g., the host genome and then also PhiX - common quality-control reagent during sequencing) you can simply concatenate the FASTAs of each taxa or sequences into a single reference file.
#### Run Merging
For samples that may have been sequenced over multiple runs, or for FASTQ files split into multiple chunks, you can activate the ability to merge across all runs or chunks with `--perform_runmerging`.
For more information how to set up your input samplesheet, see [Multiple runs of the same sample](#multiple-runs-of-the-same-sample).
Activating this functionality will concatenate the FASTQ files with the same sample name _after_ the optional preprocessing steps and _before_ profiling. Note that libraries with runs of different pairing types will **not** be merged and this will be indicated on output files with a `_se` or `_pe` suffix to the sample name accordingly.
You can optionally save the FASTQ output of the run merging with the `--save_runmerged_reads`.
##### Profiling
###### MALT
nf-core/taxprofiler uses MALT 0.4.1, which is a compatively old version. However it has been found that the most recent version of MALT (0.5.\*), at the time of writing, is broken. [The the LCA step appears not to be executed](http://megan.informatik.uni-tuebingen.de/t/lca-placement-failure-with-malt-v-0-5-2-and-0-5-3/1996/3), pushing all hits to the leaves of the taxonomy. However, if you need to use a more recent taxonomy map file with your databases, the output of `malt-build` from MALT 0.5.3 should be still be compatible with `malt-run` of 0.4.1.
### Updating the pipeline
When you run the above command, Nextflow automatically pulls the pipeline code from GitHub and stores it as a cached version. When running the pipeline after this, it will always use the cached version if available - even if the pipeline has been updated since. To make sure that you're running the latest version of the pipeline, make sure that you regularly update the cached version of the pipeline:

View file

@ -10,11 +10,11 @@ class WorkflowTaxprofiler {
public static void initialise(params, log) {
genomeExistsError(params, log)
if (!params.fasta) {
log.error "Genome fasta file not specified with e.g. '--fasta genome.fa' or via a detectable config file."
System.exit(1)
}
// TODO update as necessary
//if (!params.fasta) {
// log.error "Genome fasta file not specified with e.g. '--fasta genome.fa' or via a detectable config file."
// System.exit(1)
//}
}
//

View file

@ -3,20 +3,104 @@
"homePage": "https://github.com/nf-core/taxprofiler",
"repos": {
"nf-core/modules": {
"git_url": "https://github.com/nf-core/modules.git",
"modules": {
"custom/dumpsoftwareversions": {
"git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d",
"branch": "master"
},
"fastqc": {
"git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d",
"branch": "master"
},
"multiqc": {
"git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d",
"branch": "master"
}
"adapterremoval": {
"git_sha": "879d42c5e28661fe0a5e744c9e2c515868f9e08a"
},
"bbmap/bbduk": {
"git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d"
},
"bowtie2/align": {
"git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d"
},
"bowtie2/build": {
"git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d"
},
"cat/fastq": {
"git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d"
},
"centrifuge/centrifuge": {
"git_sha": "d2726fcf75063960f06b36d2229a4c0966614108"
},
"centrifuge/kreport": {
"git_sha": "734d0db6079a4aa43b6509b207e5d6feb35d4838"
},
"custom/dumpsoftwareversions": {
"git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d"
},
"diamond/blastx": {
"git_sha": "3531824af826c16cd252bc5aa82ae169b244ebaa"
},
"fastp": {
"git_sha": "d0a1cbb703a130c19f6796c3fce24fbe7dfce789"
},
"fastqc": {
"git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d"
},
"filtlong": {
"git_sha": "957cb9b83668075f4af101fc99502908cca487e3"
},
"gunzip": {
"git_sha": "9aadd9a6d3f5964476582319b3a1c54a3e3fe7c9"
},
"kaiju/kaiju": {
"git_sha": "8856f127c58f6af479128be8b8df4d42e442ddbe"
},
"kaiju/kaiju2krona": {
"git_sha": "2f0b19240430de6807b1232e6d9d0e8084e8a28f"
},
"kaiju/kaiju2table": {
"git_sha": "538dbac98ba9c8f799536cd5a617195501439457"
},
"kraken2/kraken2": {
"git_sha": "abe025677cdd805cc93032341ab19885473c1a07"
},
"krakentools/kreport2krona": {
"git_sha": "8b2a473f586bed003e72d2b183acc43fc0ddc422"
},
"krona/ktimporttaxonomy": {
"git_sha": "0e9fd9370ad1845870b8a9c63fcc47d999a1739e"
},
"krona/ktimporttext": {
"git_sha": "cdefbec66999c0b49d8bfeea9d6f9d19056635a2"
},
"malt/run": {
"git_sha": "be8d7b3293cac26cc63e4dbfb364deb8ed6ec7e5"
},
"megan/rma2info": {
"git_sha": "2d38566eca4cc15142b2ffa7c11837569b39aece"
},
"metaphlan3": {
"git_sha": "ed4dd1a928ebf4308efb720de878045f7773f8e2"
},
"minimap2/align": {
"git_sha": "1a5a9e7b4009dcf34e6867dd1a5a1d9a718b027b"
},
"minimap2/index": {
"git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d"
},
"motus/merge": {
"git_sha": "b02e648c221e1da17cb589eefe297e61ec9e9c49"
},
"motus/profile": {
"git_sha": "b6ed584443ad68ac41e6975994139454a4f23c18"
},
"multiqc": {
"git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d"
},
"porechop": {
"git_sha": "b78e19b9dae3671db2c7d4346fe04452c1debfab"
},
"prinseqplusplus": {
"git_sha": "f1c5384c31e985591716afdd732cf8c2ae29d05b"
},
"samtools/bam2fq": {
"git_sha": "5510ea39fe638594bc26ac34cadf4a84bf27d159"
},
"samtools/view": {
"git_sha": "6b64f9cb6c3dd3577931cc3cd032d6fb730000ce"
},
"untar": {
"git_sha": "e080f4c8acf5760039ed12ec1f206170f3f9a918"
}
}
}

View file

@ -0,0 +1,25 @@
process DATABASE_CHECK {
tag "$databasesheet"
conda (params.enable_conda ? "conda-forge::python=3.8.3" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/python:3.8.3' :
'quay.io/biocontainers/python:3.8.3' }"
input:
path databasesheet
output:
path '*.csv' , emit: csv
path "versions.yml", emit: versions
script: // This script is bundled with the pipeline, in nf-core/taxprofiler/bin/
"""
cat $databasesheet >> database_sheet.valid.csv
cat <<-END_VERSIONS > versions.yml
"${task.process}":
python: \$(python --version | sed 's/Python //g')
END_VERSIONS
"""
}

View file

@ -0,0 +1,31 @@
process ENSURE_FASTQ_EXTENSION {
tag "$meta.id"
label 'process_low'
conda (params.enable_conda ? "conda-forge::bash=5.0" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://containers.biocontainers.pro/s3/SingImgsRepo/biocontainers/v1.2.0_cv2/biocontainers_v1.2.0_cv2.img' :
'biocontainers/biocontainers:v1.2.0_cv2' }"
input:
tuple val(meta), path(reads)
output:
tuple val(meta), path('*.fastq.gz'), emit: reads
script:
if (meta.single_end) {
fastq = "${reads.baseName}.fastq.gz"
"""
ln -s '${reads}' '${fastq}'
"""
} else {
first = "${reads[0].baseName}.fastq.gz"
second = "${reads[1].baseName}.fastq.gz"
"""
ln -s '${reads[0]}' '${first}'
ln -s '${reads[1]}' '${second}'
"""
}
}

View file

@ -0,0 +1,40 @@
process KRONA_CLEANUP {
tag "$meta.id"
label 'process_low'
conda (params.enable_conda ? "conda-forge::sed=4.7" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://containers.biocontainers.pro/s3/SingImgsRepo/biocontainers/v1.2.0_cv1/biocontainers_v1.2.0_cv1.img' :
'biocontainers/biocontainers:v1.2.0_cv1' }"
input:
tuple val(meta), path(krona, stageAs: 'uncleaned.krona.txt')
output:
tuple val(meta), path("*.txt"), emit: txt
path "versions.yml", emit: versions
when:
task.ext.when == null || task.ext.when
script:
def prefix = task.ext.prefix ?: "${meta.id}"
"""
# Copy the file to a new name
cp ${krona} ${prefix}.txt
# Remove ugly 'x__' prefixes for each of the taxonomic levels
LEVELS=(d k p c o f g s)
for L in "\${LEVELS[@]}"; do
sed -i "s/\${L}__//g" ${prefix}.txt
done
# Remove underscores that are standing in place of spaces
sed -i "s/_/ /g" ${prefix}.txt
cat <<-END_VERSIONS > versions.yml
"${task.process}":
sed: \$(echo \$(sed --version 2>&1) | sed 's/^.*GNU sed) //; s/ .*\$//')
END_VERSIONS
"""
}

View file

@ -0,0 +1,92 @@
process ADAPTERREMOVAL {
tag "$meta.id"
label 'process_medium'
conda (params.enable_conda ? "bioconda::adapterremoval=2.3.2" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/adapterremoval:2.3.2--hb7ba0dd_0' :
'quay.io/biocontainers/adapterremoval:2.3.2--hb7ba0dd_0' }"
input:
tuple val(meta), path(reads)
path(adapterlist)
output:
tuple val(meta), path("${prefix}.truncated.fastq.gz") , optional: true, emit: singles_truncated
tuple val(meta), path("${prefix}.discarded.fastq.gz") , optional: true, emit: discarded
tuple val(meta), path("${prefix}.pair{1,2}.truncated.fastq.gz") , optional: true, emit: paired_truncated
tuple val(meta), path("${prefix}.collapsed.fastq.gz") , optional: true, emit: collapsed
tuple val(meta), path("${prefix}.collapsed.truncated.fastq.gz") , optional: true, emit: collapsed_truncated
tuple val(meta), path("${prefix}.paired.fastq.gz") , optional: true, emit: paired_interleaved
tuple val(meta), path('*.settings') , emit: settings
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
def list = adapterlist ? "--adapter-list ${adapterlist}" : ""
prefix = task.ext.prefix ?: "${meta.id}"
if (meta.single_end) {
"""
AdapterRemoval \\
--file1 $reads \\
$args \\
$adapterlist \\
--basename ${prefix} \\
--threads ${task.cpus} \\
--seed 42 \\
--gzip
ensure_fastq() {
if [ -f "\${1}" ]; then
mv "\${1}" "\${1::-3}.fastq.gz"
fi
}
ensure_fastq '${prefix}.truncated.gz'
ensure_fastq '${prefix}.discarded.gz'
cat <<-END_VERSIONS > versions.yml
"${task.process}":
adapterremoval: \$(AdapterRemoval --version 2>&1 | sed -e "s/AdapterRemoval ver. //g")
END_VERSIONS
"""
} else {
"""
AdapterRemoval \\
--file1 ${reads[0]} \\
--file2 ${reads[1]} \\
$args \\
$adapterlist \\
--basename ${prefix} \\
--threads $task.cpus \\
--seed 42 \\
--gzip
ensure_fastq() {
if [ -f "\${1}" ]; then
mv "\${1}" "\${1::-3}.fastq.gz"
fi
}
ensure_fastq '${prefix}.truncated.gz'
ensure_fastq '${prefix}.discarded.gz'
ensure_fastq '${prefix}.pair1.truncated.gz'
ensure_fastq '${prefix}.pair2.truncated.gz'
ensure_fastq '${prefix}.collapsed.gz'
ensure_fastq '${prefix}.collapsed.truncated.gz'
ensure_fastq '${prefix}.paired.gz'
cat <<-END_VERSIONS > versions.yml
"${task.process}":
adapterremoval: \$(AdapterRemoval --version 2>&1 | sed -e "s/AdapterRemoval ver. //g")
END_VERSIONS
"""
}
}

View file

@ -0,0 +1,90 @@
name: adapterremoval
description: Trim sequencing adapters and collapse overlapping reads
keywords:
- trimming
- adapters
- merging
- fastq
tools:
- adapterremoval:
description: The AdapterRemoval v2 tool for merging and clipping reads.
homepage: https://github.com/MikkelSchubert/adapterremoval
documentation: https://adapterremoval.readthedocs.io
licence: ["GPL v3"]
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- reads:
type: file
description: |
List of input FastQ files of size 1 and 2 for single-end and paired-end data,
respectively.
pattern: "*.{fq,fastq,fq.gz,fastq.gz}"
- adapterlist:
type: file
description: Optional text file containing list of adapters to look for for removal
with one adapter per line. Otherwise will look for default adapters (see
AdapterRemoval man page), or can be modified to remove user-specified
adapters via ext.args.
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- singles_truncated:
type: file
description: |
Adapter trimmed FastQ files of either single-end reads, or singleton
'orphaned' reads from merging of paired-end data (i.e., one of the pair
was lost due to filtering thresholds).
pattern: "*.truncated.fastq.gz"
- discarded:
type: file
description: |
Adapter trimmed FastQ files of reads that did not pass filtering
thresholds.
pattern: "*.discarded.fastq.gz"
- pair1_truncated:
type: file
description: |
Adapter trimmed R1 FastQ files of paired-end reads that did not merge
with their respective R2 pair due to long templates. The respective pair
is stored in 'pair2_truncated'.
pattern: "*.pair1.truncated.fastq.gz"
- pair2_truncated:
type: file
description: |
Adapter trimmed R2 FastQ files of paired-end reads that did not merge
with their respective R1 pair due to long templates. The respective pair
is stored in 'pair1_truncated'.
pattern: "*.pair2.truncated.fastq.gz"
- collapsed:
type: file
description: |
Collapsed FastQ of paired-end reads that successfully merged with their
respective R1 pair but were not trimmed.
pattern: "*.collapsed.fastq.gz"
- collapsed_truncated:
type: file
description: |
Collapsed FastQ of paired-end reads that successfully merged with their
respective R1 pair and were trimmed of adapter due to sufficient overlap.
pattern: "*.collapsed.truncated.fastq.gz"
- log:
type: file
description: AdapterRemoval log file
pattern: "*.settings"
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
authors:
- "@maxibor"
- "@jfy133"

View file

@ -0,0 +1,43 @@
process BBMAP_BBDUK {
tag "$meta.id"
label 'process_medium'
conda (params.enable_conda ? "bioconda::bbmap=38.90" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/bbmap:38.90--he522d1c_1' :
'quay.io/biocontainers/bbmap:38.90--he522d1c_1' }"
input:
tuple val(meta), path(reads)
path contaminants
output:
tuple val(meta), path('*.fastq.gz'), emit: reads
tuple val(meta), path('*.log') , emit: log
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def raw = meta.single_end ? "in=${reads[0]}" : "in1=${reads[0]} in2=${reads[1]}"
def trimmed = meta.single_end ? "out=${prefix}.fastq.gz" : "out1=${prefix}_1.fastq.gz out2=${prefix}_2.fastq.gz"
def contaminants_fa = contaminants ? "ref=$contaminants" : ''
"""
maxmem=\$(echo \"$task.memory\"| sed 's/ GB/g/g')
bbduk.sh \\
-Xmx\$maxmem \\
$raw \\
$trimmed \\
threads=$task.cpus \\
$args \\
$contaminants_fa \\
&> ${prefix}.bbduk.log
cat <<-END_VERSIONS > versions.yml
"${task.process}":
bbmap: \$(bbversion.sh)
END_VERSIONS
"""
}

View file

@ -0,0 +1,52 @@
name: bbmap_bbduk
description: Adapter and quality trimming of sequencing reads
keywords:
- trimming
- adapter trimming
- quality trimming
tools:
- bbmap:
description: BBMap is a short read aligner, as well as various other bioinformatic tools.
homepage: https://jgi.doe.gov/data-and-tools/bbtools/bb-tools-user-guide/
documentation: https://jgi.doe.gov/data-and-tools/bbtools/bb-tools-user-guide/
tool_dev_url: None
doi: ""
licence: ["UC-LBL license (see package)"]
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- reads:
type: file
description: |
List of input FastQ files of size 1 and 2 for single-end and paired-end data,
respectively.
- contaminants:
type: file
description: |
Reference files containing adapter and/or contaminant sequences for sequence kmer matching
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- reads:
type: file
description: The trimmed/modified fastq reads
pattern: "*fastq.gz"
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- log:
type: file
description: Bbduk log file
pattern: "*bbduk.log"
authors:
- "@MGordon09"

View file

@ -0,0 +1,77 @@
process BOWTIE2_ALIGN {
tag "$meta.id"
label 'process_high'
conda (params.enable_conda ? 'bioconda::bowtie2=2.4.4 bioconda::samtools=1.14 conda-forge::pigz=2.6' : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/mulled-v2-ac74a7f02cebcfcc07d8e8d1d750af9c83b4d45a:4d235f41348a00533f18e47c9669f1ecb327f629-0' :
'quay.io/biocontainers/mulled-v2-ac74a7f02cebcfcc07d8e8d1d750af9c83b4d45a:4d235f41348a00533f18e47c9669f1ecb327f629-0' }"
input:
tuple val(meta), path(reads)
path index
val save_unaligned
output:
tuple val(meta), path('*.bam') , emit: bam
tuple val(meta), path('*.log') , emit: log
tuple val(meta), path('*fastq.gz'), emit: fastq, optional:true
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
def args2 = task.ext.args2 ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
if (meta.single_end) {
def unaligned = save_unaligned ? "--un-gz ${prefix}.unmapped.fastq.gz" : ''
"""
INDEX=`find -L ./ -name "*.rev.1.bt2" | sed 's/.rev.1.bt2//'`
bowtie2 \\
-x \$INDEX \\
-U $reads \\
--threads $task.cpus \\
$unaligned \\
$args \\
2> ${prefix}.bowtie2.log \\
| samtools view -@ $task.cpus $args2 -bhS -o ${prefix}.bam -
cat <<-END_VERSIONS > versions.yml
"${task.process}":
bowtie2: \$(echo \$(bowtie2 --version 2>&1) | sed 's/^.*bowtie2-align-s version //; s/ .*\$//')
samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' )
END_VERSIONS
"""
} else {
def unaligned = save_unaligned ? "--un-conc-gz ${prefix}.unmapped.fastq.gz" : ''
"""
INDEX=`find -L ./ -name "*.rev.1.bt2" | sed 's/.rev.1.bt2//'`
bowtie2 \\
-x \$INDEX \\
-1 ${reads[0]} \\
-2 ${reads[1]} \\
--threads $task.cpus \\
$unaligned \\
$args \\
2> ${prefix}.bowtie2.log \\
| samtools view -@ $task.cpus $args2 -bhS -o ${prefix}.bam -
if [ -f ${prefix}.unmapped.fastq.1.gz ]; then
mv ${prefix}.unmapped.fastq.1.gz ${prefix}.unmapped_1.fastq.gz
fi
if [ -f ${prefix}.unmapped.fastq.2.gz ]; then
mv ${prefix}.unmapped.fastq.2.gz ${prefix}.unmapped_2.fastq.gz
fi
cat <<-END_VERSIONS > versions.yml
"${task.process}":
bowtie2: \$(echo \$(bowtie2 --version 2>&1) | sed 's/^.*bowtie2-align-s version //; s/ .*\$//')
samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' )
END_VERSIONS
"""
}
}

View file

@ -0,0 +1,51 @@
name: bowtie2_align
description: Align reads to a reference genome using bowtie2
keywords:
- align
- fasta
- genome
- reference
tools:
- bowtie2:
description: |
Bowtie 2 is an ultrafast and memory-efficient tool for aligning
sequencing reads to long reference sequences.
homepage: http://bowtie-bio.sourceforge.net/bowtie2/index.shtml
documentation: http://bowtie-bio.sourceforge.net/bowtie2/manual.shtml
doi: 10.1038/nmeth.1923
licence: ["GPL-3.0-or-later"]
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- reads:
type: file
description: |
List of input FastQ files of size 1 and 2 for single-end and paired-end data,
respectively.
- index:
type: file
description: Bowtie2 genome index files
pattern: "*.ebwt"
output:
- bam:
type: file
description: Output BAM file containing read alignments
pattern: "*.{bam}"
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- fastq:
type: file
description: Unaligned FastQ files
pattern: "*.fastq.gz"
- log:
type: file
description: Aligment log
pattern: "*.log"
authors:
- "@joseespinosa"
- "@drpatelh"

View file

@ -0,0 +1,30 @@
process BOWTIE2_BUILD {
tag "$fasta"
label 'process_high'
conda (params.enable_conda ? 'bioconda::bowtie2=2.4.4' : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/bowtie2:2.4.4--py39hbb4e92a_0' :
'quay.io/biocontainers/bowtie2:2.4.4--py39hbb4e92a_0' }"
input:
path fasta
output:
path 'bowtie2' , emit: index
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
"""
mkdir bowtie2
bowtie2-build $args --threads $task.cpus $fasta bowtie2/${fasta.baseName}
cat <<-END_VERSIONS > versions.yml
"${task.process}":
bowtie2: \$(echo \$(bowtie2 --version 2>&1) | sed 's/^.*bowtie2-align-s version //; s/ .*\$//')
END_VERSIONS
"""
}

View file

@ -0,0 +1,33 @@
name: bowtie2_build
description: Builds bowtie index for reference genome
keywords:
- build
- index
- fasta
- genome
- reference
tools:
- bowtie2:
description: |
Bowtie 2 is an ultrafast and memory-efficient tool for aligning
sequencing reads to long reference sequences.
homepage: http://bowtie-bio.sourceforge.net/bowtie2/index.shtml
documentation: http://bowtie-bio.sourceforge.net/bowtie2/manual.shtml
doi: 10.1038/nmeth.1923
licence: ["GPL-3.0-or-later"]
input:
- fasta:
type: file
description: Input genome fasta file
output:
- index:
type: file
description: Bowtie2 genome index files
pattern: "*.bt2"
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
authors:
- "@joseespinosa"
- "@drpatelh"

View file

@ -0,0 +1,51 @@
process CAT_FASTQ {
tag "$meta.id"
label 'process_low'
conda (params.enable_conda ? "conda-forge::sed=4.7" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://containers.biocontainers.pro/s3/SingImgsRepo/biocontainers/v1.2.0_cv1/biocontainers_v1.2.0_cv1.img' :
'biocontainers/biocontainers:v1.2.0_cv1' }"
input:
tuple val(meta), path(reads, stageAs: "input*/*")
output:
tuple val(meta), path("*.merged.fastq.gz"), emit: reads
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def readList = reads.collect{ it.toString() }
if (meta.single_end) {
if (readList.size > 1) {
"""
cat ${readList.join(' ')} > ${prefix}.merged.fastq.gz
cat <<-END_VERSIONS > versions.yml
"${task.process}":
cat: \$(echo \$(cat --version 2>&1) | sed 's/^.*coreutils) //; s/ .*\$//')
END_VERSIONS
"""
}
} else {
if (readList.size > 2) {
def read1 = []
def read2 = []
readList.eachWithIndex{ v, ix -> ( ix & 1 ? read2 : read1 ) << v }
"""
cat ${read1.join(' ')} > ${prefix}_1.merged.fastq.gz
cat ${read2.join(' ')} > ${prefix}_2.merged.fastq.gz
cat <<-END_VERSIONS > versions.yml
"${task.process}":
cat: \$(echo \$(cat --version 2>&1) | sed 's/^.*coreutils) //; s/ .*\$//')
END_VERSIONS
"""
}
}
}

View file

@ -0,0 +1,39 @@
name: cat_fastq
description: Concatenates fastq files
keywords:
- fastq
- concatenate
tools:
- cat:
description: |
The cat utility reads files sequentially, writing them to the standard output.
documentation: https://www.gnu.org/software/coreutils/manual/html_node/cat-invocation.html
licence: ["GPL-3.0-or-later"]
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- reads:
type: list
description: |
List of input FastQ files to be concatenated.
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- reads:
type: file
description: Merged fastq file
pattern: "*.{merged.fastq.gz}"
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
authors:
- "@joseespinosa"
- "@drpatelh"

View file

@ -0,0 +1,61 @@
process CENTRIFUGE_CENTRIFUGE {
tag "$meta.id"
label 'process_high'
conda (params.enable_conda ? "bioconda::centrifuge=1.0.4_beta" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/centrifuge:1.0.4_beta--h9a82719_6' :
'quay.io/biocontainers/centrifuge:1.0.4_beta--h9a82719_6' }"
input:
tuple val(meta), path(reads)
path db
val save_unaligned
val save_aligned
val sam_format
output:
tuple val(meta), path('*report.txt') , emit: report
tuple val(meta), path('*results.txt') , emit: results
tuple val(meta), path('*.sam') , optional: true, emit: sam
tuple val(meta), path('*.mapped.fastq{,.1,.2}.gz') , optional: true, emit: fastq_mapped
tuple val(meta), path('*.unmapped.fastq{,.1,.2}.gz') , optional: true, emit: fastq_unmapped
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def paired = meta.single_end ? "-U ${reads}" : "-1 ${reads[0]} -2 ${reads[1]}"
def unaligned = ''
def aligned = ''
if (meta.single_end) {
unaligned = save_unaligned ? "--un-gz ${prefix}.unmapped.fastq.gz" : ''
aligned = save_aligned ? "--al-gz ${prefix}.mapped.fastq.gz" : ''
} else {
unaligned = save_unaligned ? "--un-conc-gz ${prefix}.unmapped.fastq.gz" : ''
aligned = save_aligned ? "--al-conc-gz ${prefix}.mapped.fastq.gz" : ''
}
def sam_output = sam_format ? "--out-fmt 'sam'" : ''
"""
## we add "-no-name ._" to ensure silly Mac OSX metafiles files aren't included
db_name=`find -L ${db} -name "*.1.cf" -not -name "._*" | sed 's/.1.cf//'`
centrifuge \\
-x \$db_name \\
-p $task.cpus \\
$paired \\
--report-file ${prefix}.report.txt \\
-S ${prefix}.results.txt \\
$unaligned \\
$aligned \\
$sam_output \\
$args
cat <<-END_VERSIONS > versions.yml
"${task.process}":
centrifuge: \$( centrifuge --version | sed -n 1p | sed 's/^.*centrifuge-class version //')
END_VERSIONS
"""
}

View file

@ -0,0 +1,66 @@
name: centrifuge_centrifuge
description: Classifies metagenomic sequence data
keywords:
- classify
- metagenomics
- fastq
- db
tools:
- centrifuge:
description: Centrifuge is a classifier for metagenomic sequences.
homepage: https://ccb.jhu.edu/software/centrifuge/
documentation: https://ccb.jhu.edu/software/centrifuge/manual.shtml
doi: 10.1101/gr.210641.116
licence: ["GPL v3"]
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- reads:
type: file
description: |
List of input FastQ files of size 1 and 2 for single-end and paired-end data,
respectively.
- db:
type: directory
description: Path to directory containing centrifuge database files
- save_unaligned:
type: value
description: If true unmapped fastq files are saved
- save_aligned:
type: value
description: If true mapped fastq files are saved
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- report:
type: file
description: |
File containing a classification summary
pattern: "*.{report.txt}"
- results:
type: file
description: |
File containing classification results
pattern: "*.{results.txt}"
- fastq_unmapped:
type: file
description: Unmapped fastq files
pattern: "*.unmapped.fastq.gz"
- fastq_mapped:
type: file
description: Mapped fastq files
pattern: "*.mapped.fastq.gz"
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
authors:
- "@sofstam"
- "@jfy133"
- "@sateeshperi"

View file

@ -0,0 +1,33 @@
process CENTRIFUGE_KREPORT {
tag "$meta.id"
label 'process_low'
conda (params.enable_conda ? "bioconda::centrifuge=1.0.4_beta" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/centrifuge:1.0.4_beta--h9a82719_6':
'quay.io/biocontainers/centrifuge:1.0.4_beta--h9a82719_6' }"
input:
tuple val(meta), path(report)
path db
output:
tuple val(meta), path('*.txt') , emit: kreport
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
"""
db_name=`find -L ${db} -name "*.1.cf" -not -name "._*" | sed 's/.1.cf//'`
centrifuge-kreport -x \$db_name ${report} > ${prefix}.txt
cat <<-END_VERSIONS > versions.yml
"${task.process}":
centrifuge: \$( centrifuge --version | sed -n 1p | sed 's/^.*centrifuge-class version //')
END_VERSIONS
"""
}

View file

@ -0,0 +1,41 @@
name: "centrifuge_kreport"
description: Creates Kraken-style reports from centrifuge out files
keywords:
- metagenomics
tools:
- centrifuge:
description: Centrifuge is a classifier for metagenomic sequences.
homepage: https://ccb.jhu.edu/software/centrifuge/
documentation: https://ccb.jhu.edu/software/centrifuge/manual.shtml
doi: 10.1101/gr.210641.116
licence: ["GPL v3"]
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- report:
type: file
description: File containing the centrifuge classification report
pattern: "*.{txt}"
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- kreport:
type: file
description: |
File containing kraken-style report from centrifuge
out files.
pattern: "*.{txt}"
authors:
- "@sofstam"
- "@jfy133"

View file

@ -0,0 +1,68 @@
process DIAMOND_BLASTX {
tag "$meta.id"
label 'process_medium'
conda (params.enable_conda ? "bioconda::diamond=2.0.15" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/diamond:2.0.15--hb97b32f_0' :
'quay.io/biocontainers/diamond:2.0.15--hb97b32f_0' }"
input:
tuple val(meta), path(fasta)
path db
val out_ext
val blast_columns
output:
tuple val(meta), path('*.blast'), optional: true, emit: blast
tuple val(meta), path('*.xml') , optional: true, emit: xml
tuple val(meta), path('*.txt') , optional: true, emit: txt
tuple val(meta), path('*.daa') , optional: true, emit: daa
tuple val(meta), path('*.sam') , optional: true, emit: sam
tuple val(meta), path('*.tsv') , optional: true, emit: tsv
tuple val(meta), path('*.paf') , optional: true, emit: paf
tuple val(meta), path("*.log") , emit: log
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def columns = blast_columns ? "${blast_columns}" : ''
switch ( out_ext ) {
case "blast": outfmt = 0; break
case "xml": outfmt = 5; break
case "txt": outfmt = 6; break
case "daa": outfmt = 100; break
case "sam": outfmt = 101; break
case "tsv": outfmt = 102; break
case "paf": outfmt = 103; break
default:
outfmt = '6';
out_ext = 'txt';
log.warn("Unknown output file format provided (${out_ext}): selecting DIAMOND default of tabular BLAST output (txt)");
break
}
"""
DB=`find -L ./ -name "*.dmnd" | sed 's/.dmnd//'`
diamond \\
blastx \\
--threads $task.cpus \\
--db \$DB \\
--query $fasta \\
--outfmt ${outfmt} ${columns} \\
$args \\
--out ${prefix}.${out_ext} \\
--log
mv diamond.log ${prefix}.log
cat <<-END_VERSIONS > versions.yml
"${task.process}":
diamond: \$(diamond --version 2>&1 | tail -n 1 | sed 's/^diamond version //')
END_VERSIONS
"""
}

View file

@ -0,0 +1,81 @@
name: diamond_blastx
description: Queries a DIAMOND database using blastx mode
keywords:
- fasta
- diamond
- blastx
- DNA sequence
tools:
- diamond:
description: Accelerated BLAST compatible local sequence aligner
homepage: https://github.com/bbuchfink/diamond
documentation: https://github.com/bbuchfink/diamond/wiki
tool_dev_url: https://github.com/bbuchfink/diamond
doi: "doi:10.1038/s41592-021-01101-x"
licence: ["GPL v3.0"]
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- fasta:
type: file
description: Input fasta file containing query sequences
pattern: "*.{fa,fasta}"
- db:
type: directory
description: Directory containing the nucelotide blast database
pattern: "*"
- out_ext:
type: string
description: |
Specify the type of output file to be generated. `blast` corresponds to
BLAST pairwise format. `xml` corresponds to BLAST xml format.
`txt` corresponds to to BLAST tabular format. `tsv` corresponds to
taxonomic classification format.
pattern: "blast|xml|txt|daa|sam|tsv|paf"
output:
- blast:
type: file
description: File containing blastp hits
pattern: "*.{blast}"
- xml:
type: file
description: File containing blastp hits
pattern: "*.{xml}"
- txt:
type: file
description: File containing hits in tabular BLAST format.
pattern: "*.{txt}"
- daa:
type: file
description: File containing hits DAA format
pattern: "*.{daa}"
- sam:
type: file
description: File containing aligned reads in SAM format
pattern: "*.{sam}"
- tsv:
type: file
description: Tab separated file containing taxonomic classification of hits
pattern: "*.{tsv}"
- paf:
type: file
description: File containing aligned reads in pairwise mapping format format
pattern: "*.{paf}"
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- log:
type: file
description: Log file containing stdout information
pattern: "*.{log}"
authors:
- "@spficklin"
- "@jfy133"
- "@mjamy"

75
modules/nf-core/modules/fastp/main.nf generated Normal file
View file

@ -0,0 +1,75 @@
process FASTP {
tag "$meta.id"
label 'process_medium'
conda (params.enable_conda ? 'bioconda::fastp=0.23.2' : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/fastp:0.23.2--h79da9fb_0' :
'quay.io/biocontainers/fastp:0.23.2--h79da9fb_0' }"
input:
tuple val(meta), path(reads)
val save_trimmed_fail
val save_merged
output:
tuple val(meta), path('*.trim.fastq.gz') , optional:true, emit: reads
tuple val(meta), path('*.json') , emit: json
tuple val(meta), path('*.html') , emit: html
tuple val(meta), path('*.log') , emit: log
path "versions.yml" , emit: versions
tuple val(meta), path('*.fail.fastq.gz') , optional:true, emit: reads_fail
tuple val(meta), path('*.merged.fastq.gz'), optional:true, emit: reads_merged
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
// Added soft-links to original fastqs for consistent naming in MultiQC
def prefix = task.ext.prefix ?: "${meta.id}"
if (meta.single_end) {
def fail_fastq = save_trimmed_fail ? "--failed_out ${prefix}.fail.fastq.gz" : ''
"""
[ ! -f ${prefix}.fastq.gz ] && ln -s $reads ${prefix}.fastq.gz
fastp \\
--in1 ${prefix}.fastq.gz \\
--out1 ${prefix}.trim.fastq.gz \\
--thread $task.cpus \\
--json ${prefix}.fastp.json \\
--html ${prefix}.fastp.html \\
$fail_fastq \\
$args \\
2> ${prefix}.fastp.log
cat <<-END_VERSIONS > versions.yml
"${task.process}":
fastp: \$(fastp --version 2>&1 | sed -e "s/fastp //g")
END_VERSIONS
"""
} else {
def fail_fastq = save_trimmed_fail ? "--unpaired1 ${prefix}_1.fail.fastq.gz --unpaired2 ${prefix}_2.fail.fastq.gz" : ''
def merge_fastq = save_merged ? "-m --merged_out ${prefix}.merged.fastq.gz" : ''
"""
[ ! -f ${prefix}_1.fastq.gz ] && ln -s ${reads[0]} ${prefix}_1.fastq.gz
[ ! -f ${prefix}_2.fastq.gz ] && ln -s ${reads[1]} ${prefix}_2.fastq.gz
fastp \\
--in1 ${prefix}_1.fastq.gz \\
--in2 ${prefix}_2.fastq.gz \\
--out1 ${prefix}_1.trim.fastq.gz \\
--out2 ${prefix}_2.trim.fastq.gz \\
--json ${prefix}.fastp.json \\
--html ${prefix}.fastp.html \\
$fail_fastq \\
$merge_fastq \\
--thread $task.cpus \\
--detect_adapter_for_pe \\
$args \\
2> ${prefix}.fastp.log
cat <<-END_VERSIONS > versions.yml
"${task.process}":
fastp: \$(fastp --version 2>&1 | sed -e "s/fastp //g")
END_VERSIONS
"""
}
}

68
modules/nf-core/modules/fastp/meta.yml generated Normal file
View file

@ -0,0 +1,68 @@
name: fastp
description: Perform adapter/quality trimming on sequencing reads
keywords:
- trimming
- quality control
- fastq
tools:
- fastp:
description: |
A tool designed to provide fast all-in-one preprocessing for FastQ files. This tool is developed in C++ with multithreading supported to afford high performance.
documentation: https://github.com/OpenGene/fastp
doi: https://doi.org/10.1093/bioinformatics/bty560
licence: ["MIT"]
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- reads:
type: file
description: |
List of input FastQ files of size 1 and 2 for single-end and paired-end data,
respectively.
- save_trimmed_fail:
type: boolean
description: Specify true to save files that failed to pass trimming thresholds ending in `*.fail.fastq.gz`
- save_merged:
type: boolean
description: Specify true to save all merged reads to the a file ending in `*.merged.fastq.gz`
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- reads:
type: file
description: The trimmed/modified/unmerged fastq reads
pattern: "*trim.fastq.gz"
- json:
type: file
description: Results in JSON format
pattern: "*.json"
- html:
type: file
description: Results in HTML format
pattern: "*.html"
- log:
type: file
description: fastq log file
pattern: "*.log"
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- reads_fail:
type: file
description: Reads the failed the preprocessing
pattern: "*fail.fastq.gz"
- reads_merged:
type: file
description: Reads that were successfully merged
pattern: "*.{merged.fastq.gz}"
authors:
- "@drpatelh"
- "@kevinmenden"

39
modules/nf-core/modules/filtlong/main.nf generated Normal file
View file

@ -0,0 +1,39 @@
process FILTLONG {
tag "$meta.id"
label 'process_low'
conda (params.enable_conda ? "bioconda::filtlong=0.2.1" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/filtlong:0.2.1--h9a82719_0' :
'quay.io/biocontainers/filtlong:0.2.1--h9a82719_0' }"
input:
tuple val(meta), path(shortreads), path(longreads)
output:
tuple val(meta), path("*.fastq.gz"), emit: reads
tuple val(meta), path("*.log") , emit: log
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def short_reads = !shortreads ? "" : meta.single_end ? "-1 $shortreads" : "-1 ${shortreads[0]} -2 ${shortreads[1]}"
if ("$longreads" == "${prefix}.fastq.gz") error "Longread FASTQ input and output names are the same, set prefix in module configuration to disambiguate!"
"""
filtlong \\
$short_reads \\
$args \\
$longreads \\
2> ${prefix}.log \\
| gzip -n > ${prefix}.fastq.gz
cat <<-END_VERSIONS > versions.yml
"${task.process}":
filtlong: \$( filtlong --version | sed -e "s/Filtlong v//g" )
END_VERSIONS
"""
}

View file

@ -0,0 +1,55 @@
name: filtlong
description: Filtlong filters long reads based on quality measures or short read data.
keywords:
- nanopore
- quality control
- QC
- filtering
- long reads
- short reads
tools:
- filtlong:
description: Filtlong is a tool for filtering long reads. It can take a set of long reads and produce a smaller, better subset. It uses both read length (longer is better) and read identity (higher is better) when choosing which reads pass the filter.
homepage: https://anaconda.org/bioconda/filtlong
documentation: None
tool_dev_url: https://github.com/rrwick/Filtlong
doi: ""
licence: ["GPL v3"]
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- shortreads:
type: file
description: fastq file
pattern: "*.{fq,fastq,fq.gz,fastq.gz}"
- longreads:
type: file
description: fastq file
pattern: "*.{fq,fastq,fq.gz,fastq.gz}"
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- reads:
type: file
description: Filtered (compressed) fastq file
pattern: "*.fastq.gz"
- log:
type: file
description: Standard error logging file containing summary statistics
pattern: "*.log"
authors:
- "@d4straub"
- "@sofstam"

34
modules/nf-core/modules/gunzip/main.nf generated Normal file
View file

@ -0,0 +1,34 @@
process GUNZIP {
tag "$archive"
label 'process_low'
conda (params.enable_conda ? "conda-forge::sed=4.7" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/ubuntu:20.04' :
'ubuntu:20.04' }"
input:
tuple val(meta), path(archive)
output:
tuple val(meta), path("$gunzip"), emit: gunzip
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
gunzip = archive.toString() - '.gz'
"""
gunzip \\
-f \\
$args \\
$archive
cat <<-END_VERSIONS > versions.yml
"${task.process}":
gunzip: \$(echo \$(gunzip --version 2>&1) | sed 's/^.*(gzip) //; s/ Copyright.*\$//')
END_VERSIONS
"""
}

34
modules/nf-core/modules/gunzip/meta.yml generated Normal file
View file

@ -0,0 +1,34 @@
name: gunzip
description: Compresses and decompresses files.
keywords:
- gunzip
- compression
tools:
- gunzip:
description: |
gzip is a file format and a software application used for file compression and decompression.
documentation: https://www.gnu.org/software/gzip/manual/gzip.html
licence: ["GPL-3.0-or-later"]
input:
- meta:
type: map
description: |
Optional groovy Map containing meta information
e.g. [ id:'test', single_end:false ]
- archive:
type: file
description: File to be compressed/uncompressed
pattern: "*.*"
output:
- gunzip:
type: file
description: Compressed/uncompressed file
pattern: "*.*"
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
authors:
- "@joseespinosa"
- "@drpatelh"
- "@jfy133"

View file

@ -0,0 +1,41 @@
process KAIJU_KAIJU {
tag "$meta.id"
label 'process_high'
conda (params.enable_conda ? "bioconda::kaiju=1.8.2" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/kaiju:1.8.2--h5b5514e_1':
'quay.io/biocontainers/kaiju:1.8.2--h5b5514e_1' }"
input:
tuple val(meta), path(reads)
path(db)
output:
tuple val(meta), path('*.tsv'), emit: results
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def input = meta.single_end ? "-i ${reads}" : "-i ${reads[0]} -j ${reads[1]}"
"""
dbnodes=`find -L ${db} -name "*nodes.dmp"`
dbname=`find -L ${db} -name "*.fmi" -not -name "._*"`
kaiju \\
$args \\
-z $task.cpus \\
-t \$dbnodes \\
-f \$dbname \\
-o ${prefix}.tsv \\
$input
cat <<-END_VERSIONS > versions.yml
"${task.process}":
kaiju: \$(echo \$( kaiju -h 2>&1 | sed -n 1p | sed 's/^.*Kaiju //' ))
END_VERSIONS
"""
}

View file

@ -0,0 +1,53 @@
name: kaiju_kaiju
description: Taxonomic classification of metagenomic sequence data using a protein reference database
keywords:
- classify
- metagenomics
- fastq
- taxonomic profiling
tools:
- kaiju:
description: Fast and sensitive taxonomic classification for metagenomics
homepage: https://kaiju.binf.ku.dk/
documentation: https://github.com/bioinformatics-centre/kaiju/blob/master/README.md
tool_dev_url: https://github.com/bioinformatics-centre/kaiju
doi: "10.1038/ncomms11257"
licence: ["GNU GPL v3"]
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- reads:
type: file
description: |
List of input fastq/fasta files of size 1 and 2 for single-end and paired-end data,
respectively.
pattern: "*.{fastq,fq,fasta,fa,fsa,fas,fna,fastq.gz,fq.gz,fasta.gz,fa.gz,fsa.gz,fas.gz,fna.gz}"
- db:
type: files
description: |
List containing the database and nodes files for Kaiju
e.g. [ 'database.fmi', 'nodes.dmp' ]
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- results:
type: file
description: Results with taxonomic classification of each read
pattern: "*.tsv"
authors:
- "@talnor"
- "@sofstam"
- "@jfy133"

View file

@ -0,0 +1,39 @@
process KAIJU_KAIJU2KRONA {
tag "$meta.id"
label 'process_low'
conda (params.enable_conda ? "bioconda::kaiju=1.8.2" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/kaiju:1.8.2--h5b5514e_1':
'quay.io/biocontainers/kaiju:1.8.2--h5b5514e_1' }"
input:
tuple val(meta), path(tsv)
path(db)
output:
tuple val(meta), path("*.txt"), emit: txt
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
"""
dbnodes=`find -L ${db} -name "*nodes.dmp"`
dbnames=`find -L ${db} -name "*names.dmp"`
kaiju2krona \\
$args \\
-t \$dbnodes \\
-n \$dbnames \\
-i ${tsv} \\
-o ${prefix}.txt
cat <<-END_VERSIONS > versions.yml
"${task.process}":
kaiju: \$(echo \$( kaiju -h 2>&1 | sed -n 1p | sed 's/^.*Kaiju //' ))
END_VERSIONS
"""
}

View file

@ -0,0 +1,44 @@
name: kaiju_kaiju2krona
description: Convert Kaiju's tab-separated output file into a tab-separated text file which can be imported into Krona.
keywords:
- taxonomy
- visualisation
- krona chart
- metagenomics
tools:
- "kaiju":
description: Fast and sensitive taxonomic classification for metagenomics
homepage: https://kaiju.binf.ku.dk/
documentation: https://github.com/bioinformatics-centre/kaiju/blob/master/README.md
tool_dev_url: https://github.com/bioinformatics-centre/kaiju
doi: "10.1038/ncomms11257"
licence: ["GNU GPL v3"]
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- tsv:
type: file
description: Kaiju tab-separated output file
pattern: "*.{tsv,txt}"
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- txt:
type: file
description: Krona text-based input file converted from Kaiju report
pattern: "*.{txt,krona}"
authors:
- "@MillironX"

View file

@ -0,0 +1,40 @@
process KAIJU_KAIJU2TABLE {
tag "$meta.id"
label 'process_low'
conda (params.enable_conda ? "bioconda::kaiju=1.8.2" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/kaiju:1.8.2--h5b5514e_1':
'quay.io/biocontainers/kaiju:1.8.2--h2e03b76_0' }"
input:
tuple val(meta), path(results)
path db
val taxon_rank
output:
tuple val(meta), path('*.txt'), emit: summary
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
"""
dbnodes=`find -L ${db} -name "*nodes.dmp"`
dbname=`find -L ${db} -name "*.fmi" -not -name "._*"`
kaiju2table $args \\
-t \$dbnodes \\
-n \$dbname \\
-r ${taxon_rank} \\
-o ${prefix}.txt \\
${results}
cat <<-END_VERSIONS > versions.yml
"${task.process}":
kaiju: \$(echo \$( kaiju -h 2>&1 | sed -n 1p | sed 's/^.*Kaiju //' ))
END_VERSIONS
"""
}

View file

@ -0,0 +1,50 @@
name: "kaiju_kaiju2table"
description: write your description here
keywords:
- classify
- metagenomics
tools:
- kaiju:
description: Fast and sensitive taxonomic classification for metagenomics
homepage: https://kaiju.binf.ku.dk/
documentation: https://github.com/bioinformatics-centre/kaiju/blob/master/README.md
tool_dev_url: https://github.com/bioinformatics-centre/kaiju
doi: "10.1038/ncomms11257"
licence: ["GNU GPL v3"]
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- results:
type: file
description: File containing the kaiju classification results
pattern: "*.{txt}"
- taxon_rank:
type: string
description: |
Taxonomic rank to display in report
pattern: "phylum|class|order|family|genus|species"
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- results:
type: file
description: |
Summary table for a given taxonomic rank
pattern: "*.{tsv}"
authors:
- "@sofstam"
- "@talnor"
- "@jfy133"

View file

@ -0,0 +1,58 @@
process KRAKEN2_KRAKEN2 {
tag "$meta.id"
label 'process_high'
conda (params.enable_conda ? 'bioconda::kraken2=2.1.2 conda-forge::pigz=2.6' : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/mulled-v2-5799ab18b5fc681e75923b2450abaa969907ec98:87fc08d11968d081f3e8a37131c1f1f6715b6542-0' :
'quay.io/biocontainers/mulled-v2-5799ab18b5fc681e75923b2450abaa969907ec98:87fc08d11968d081f3e8a37131c1f1f6715b6542-0' }"
input:
tuple val(meta), path(reads)
path db
val save_output_fastqs
val save_reads_assignment
output:
tuple val(meta), path('*classified*') , optional:true, emit: classified_reads_fastq
tuple val(meta), path('*unclassified*') , optional:true, emit: unclassified_reads_fastq
tuple val(meta), path('*classifiedreads*'), optional:true, emit: classified_reads_assignment
tuple val(meta), path('*report.txt') , emit: report
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def paired = meta.single_end ? "" : "--paired"
def classified = meta.single_end ? "${prefix}.classified.fastq" : "${prefix}.classified#.fastq"
def unclassified = meta.single_end ? "${prefix}.unclassified.fastq" : "${prefix}.unclassified#.fastq"
def classified_command = save_output_fastqs ? "--classified-out ${classified}" : ""
def unclassified_command = save_output_fastqs ? "--unclassified-out ${unclassified}" : ""
def readclassification_command = save_reads_assignment ? "--output ${prefix}.kraken2.classifiedreads.txt" : ""
def compress_reads_command = save_output_fastqs ? "pigz -p $task.cpus *.fastq" : ""
"""
kraken2 \\
--db $db \\
--threads $task.cpus \\
--report ${prefix}.kraken2.report.txt \\
--gzip-compressed \\
$unclassified_command \\
$classified_command \\
$readclassification_command \\
$paired \\
$args \\
$reads
$compress_reads_command
cat <<-END_VERSIONS > versions.yml
"${task.process}":
kraken2: \$(echo \$(kraken2 --version 2>&1) | sed 's/^.*Kraken version //; s/ .*\$//')
pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' )
END_VERSIONS
"""
}

View file

@ -0,0 +1,75 @@
name: kraken2_kraken2
description: Classifies metagenomic sequence data
keywords:
- classify
- metagenomics
- fastq
- db
tools:
- kraken2:
description: |
Kraken2 is a taxonomic sequence classifier that assigns taxonomic labels to sequence reads
homepage: https://ccb.jhu.edu/software/kraken2/
documentation: https://github.com/DerrickWood/kraken2/wiki/Manual
doi: 10.1186/s13059-019-1891-0
licence: ["MIT"]
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- reads:
type: file
description: |
List of input FastQ files of size 1 and 2 for single-end and paired-end data,
respectively.
- db:
type: directory
description: Kraken2 database
- save_output_fastqs:
type: boolean
description: |
If true, optional commands are added to save classified and unclassified reads
as fastq files
- save_reads_assignment:
type: boolean
description: |
If true, an optional command is added to save a file reporting the taxonomic
classification of each input read
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- classified_reads_fastq:
type: file
description: |
Reads classified as belonging to any of the taxa
on the Kraken2 database.
pattern: "*{fastq.gz}"
- unclassified_reads_fastq:
type: file
description: |
Reads not classified to any of the taxa
on the Kraken2 database.
pattern: "*{fastq.gz}"
- classified_reads_assignment:
type: file
description: |
Kraken2 output file indicating the taxonomic assignment of
each input read
- report:
type: file
description: |
Kraken2 report containing stats about classified
and not classifed reads.
pattern: "*.{report.txt}"
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
authors:
- "@joseespinosa"
- "@drpatelh"

View file

@ -0,0 +1,36 @@
def VERSION = '1.2' // Version information not provided by tool on CLI
process KRAKENTOOLS_KREPORT2KRONA {
tag "$meta.id"
label 'process_low'
conda (params.enable_conda ? "bioconda::krakentools=1.2" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/krakentools:1.2--pyh5e36f6f_0':
'quay.io/biocontainers/krakentools:1.2--pyh5e36f6f_0' }"
input:
tuple val(meta), path(kreport)
output:
tuple val(meta), path("*.txt"), emit: txt
path "versions.yml", emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
"""
kreport2krona.py \\
-r ${kreport} \\
-o ${prefix}.txt \\
${args}
cat <<-END_VERSIONS > versions.yml
"${task.process}":
kreport2krona.py: ${VERSION}
END_VERSIONS
"""
}

View file

@ -0,0 +1,41 @@
name: krakentools_kreport2krona
description: Takes a Kraken report file and prints out a krona-compatible TEXT file
keywords:
- kraken
- krona
- metagenomics
- visualization
tools:
- krakentools:
description: KrakenTools is a suite of scripts to be used for post-analysis of Kraken/KrakenUniq/Kraken2/Bracken results. Please cite the relevant paper if using KrakenTools with any of the listed programs.
homepage: https://github.com/jenniferlu717/KrakenTools
licence: ["GPL v3"]
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- kreport:
type: file
description: Kraken report
pattern: "*.{txt,kreport}"
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- krona:
type: file
description: Krona text-based input file converted from Kraken report
pattern: "*.{txt,krona}"
authors:
- "@MillironX"

View file

@ -0,0 +1,41 @@
process KRONA_KTIMPORTTAXONOMY {
tag "${meta.id}"
label 'process_high'
// WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions.
conda (params.enable_conda ? "bioconda::krona=2.8" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/krona:2.8--pl5262hdfd78af_2' :
'quay.io/biocontainers/krona:2.8--pl5262hdfd78af_2' }"
input:
tuple val(meta), path(report)
path taxonomy, stageAs: 'taxonomy.tab'
output:
tuple val(meta), path ('*.html'), emit: html
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def VERSION = '2.8' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions.
"""
TAXONOMY=\$(find -L . -name '*.tab' -exec dirname {} \\;)
echo \$TAXONOMY
ktImportTaxonomy \\
$args \\
-o ${prefix}.html \\
-tax \$TAXONOMY/ \\
$report
cat <<-END_VERSIONS > versions.yml
"${task.process}":
krona: $VERSION
END_VERSIONS
"""
}

View file

@ -0,0 +1,48 @@
name: krona_ktimporttaxonomy
description: KronaTools Import Taxonomy imports taxonomy classifications and produces an interactive Krona plot.
keywords:
- plot
- taxonomy
- interactive
- html
- visualisation
- krona chart
tools:
- krona:
description: Krona Tools is a set of scripts to create Krona charts from several Bioinformatics tools as well as from text and XML files.
homepage: https://github.com/marbl/Krona/wiki/KronaTools
documentation: http://manpages.ubuntu.com/manpages/impish/man1/ktImportTaxonomy.1.html
tool_dev_url:
doi: https://doi.org/10.1186/1471-2105-12-385
licence:
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test']
- database:
type: file
description: |
Path to a Krona taxonomy .tab file normally downloaded and generated by
krona/ktUpdateTaxonomy. Custom taxonomy files can have any name, but
must end in `.tab`.
pattern: "*tab"
- report:
type: file
description: "A tab-delimited file with taxonomy IDs and (optionally) query IDs, magnitudes, and scores. Query IDs are taken from column 1, taxonomy IDs from column 2, and scores from column 3. Lines beginning with # will be ignored."
pattern: "*.{tsv}"
output:
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- html:
type: file
description: A html file containing an interactive krona plot.
pattern: "*.{html}"
authors:
- "@mjakobs"

View file

@ -0,0 +1,34 @@
process KRONA_KTIMPORTTEXT {
tag "$meta.id"
label 'process_low'
conda (params.enable_conda ? "bioconda::krona=2.8.1" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/krona:2.8.1--pl5321hdfd78af_1':
'quay.io/biocontainers/krona:2.8.1--pl5321hdfd78af_1' }"
input:
tuple val(meta), path(report)
output:
tuple val(meta), path ('*.html'), emit: html
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
"""
ktImportText \\
$args \\
-o ${prefix}.html \\
$report
cat <<-END_VERSIONS > versions.yml
"${task.process}":
krona: \$( echo \$(ktImportText 2>&1) | sed 's/^.*KronaTools //g; s/- ktImportText.*\$//g')
END_VERSIONS
"""
}

View file

@ -0,0 +1,47 @@
name: "krona_ktimporttext"
description: Creates a Krona chart from text files listing quantities and lineages.
keywords:
- plot
- taxonomy
- interactive
- html
- visualisation
- krona chart
- metagenomics
tools:
- krona:
description: Krona Tools is a set of scripts to create Krona charts from several Bioinformatics tools as well as from text and XML files.
homepage: https://github.com/marbl/Krona/wiki/KronaTools
documentation: http://manpages.ubuntu.com/manpages/impish/man1/ktImportTaxonomy.1.html
tool_dev_url: https://github.com/marbl/Krona
doi: 10.1186/1471-2105-12-385
licence: https://raw.githubusercontent.com/marbl/Krona/master/KronaTools/LICENSE.txt
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test']
- report:
type: file
description: "Tab-delimited text file. Each line should be a number followed by a list of wedges to contribute to (starting from the highest level). If no wedges are listed (and just a quantity is given), it will contribute to the top level. If the same lineage is listed more than once, the values will be added. Quantities can be omitted if -q is specified. Lines beginning with '#' will be ignored."
pattern: "*.{txt}"
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test' ]
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- html:
type: file
description: A html file containing an interactive krona plot.
pattern: "*.{html}"
authors:
- "@jianhong"

49
modules/nf-core/modules/malt/run/main.nf generated Normal file
View file

@ -0,0 +1,49 @@
process MALT_RUN {
tag "$meta.id"
label 'process_high'
conda (params.enable_conda ? "bioconda::malt=0.41" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/malt:0.41--1' :
'quay.io/biocontainers/malt:0.41--1' }"
input:
tuple val(meta), path(fastqs)
val mode
path index
output:
tuple val(meta), path("*.rma6") , emit: rma6
tuple val(meta), path("*.{tab,text,sam}"), optional:true, emit: alignments
tuple val(meta), path("*.log") , emit: log
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def avail_mem = 6
if (!task.memory) {
log.info '[MALT_RUN] Available memory not known - defaulting to 6GB. Specify process memory requirements to change this.'
} else {
avail_mem = task.memory.giga
}
"""
malt-run \\
-t $task.cpus \\
-v \\
-o . \\
$args \\
--inFile ${fastqs.join(' ')} \\
-m $mode \\
--index $index/ |&tee ${prefix}-malt-run.log
cat <<-END_VERSIONS > versions.yml
"${task.process}":
malt: \$(malt-run --help 2>&1 | grep -o 'version.* ' | cut -f 1 -d ',' | cut -f2 -d ' ')
END_VERSIONS
"""
}

View file

@ -0,0 +1,58 @@
name: malt_run
description: MALT, an acronym for MEGAN alignment tool, is a sequence alignment and analysis tool designed for processing high-throughput sequencing data, especially in the context of metagenomics.
keywords:
- malt
- alignment
- metagenomics
- ancient DNA
- aDNA
- palaeogenomics
- archaeogenomics
- microbiome
tools:
- malt:
description: A tool for mapping metagenomic data
homepage: https://www.wsi.uni-tuebingen.de/lehrstuehle/algorithms-in-bioinformatics/software/malt/
documentation: https://software-ab.informatik.uni-tuebingen.de/download/malt/manual.pdf
tool_dev_url: None
doi: "10.1038/s41559-017-0446-6"
licence: ["GPL v3"]
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- fastqs:
type: file
description: Input FASTQ files
pattern: "*.{fastq.gz,fq.gz}"
- mode:
type: string
description: Program mode
pattern: "Unknown|BlastN|BlastP|BlastX|Classifier"
- index:
type: directory
description: Index/database directory from malt-build
pattern: "*/"
output:
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- rma6:
type: file
description: MEGAN6 RMA6 file
pattern: "*.rma6"
- sam:
type: file
description: Alignment files in Tab, Text or MEGAN-compatible SAM format
pattern: "*.{tab,txt,sam}"
- log:
type: file
description: Log of verbose MALT stdout
pattern: "*-malt-run.log"
authors:
- "@jfy133"

View file

@ -0,0 +1,38 @@
process MEGAN_RMA2INFO {
tag "$meta.id"
label 'process_low'
conda (params.enable_conda ? "bioconda::megan=6.21.7" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/megan:6.21.7--h9ee0642_0':
'quay.io/biocontainers/megan:6.21.7--h9ee0642_0' }"
input:
tuple val(meta), path(rma6)
val(megan_summary)
output:
tuple val(meta), path("*.txt.gz") , emit: txt
tuple val(meta), path("*.megan"), optional: true, emit: megan_summary
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def summary = megan_summary ? "-es ${prefix}.megan" : ""
"""
rma2info \\
-i ${rma6} \\
-o ${prefix}.txt.gz \\
${summary} \\
$args
cat <<-END_VERSIONS > versions.yml
"${task.process}":
megan: \$(echo \$(rma2info 2>&1) | grep version | sed 's/.*version //g;s/, built.*//g')
END_VERSIONS
"""
}

View file

@ -0,0 +1,51 @@
name: "megan_rma2info"
description: Analyses an RMA file and exports information in text format
keywords:
- megan
- rma6
- classification
- conversion
tools:
- "megan":
description: "A tool for studying the taxonomic content of a set of DNA reads"
homepage: "https://uni-tuebingen.de/fakultaeten/mathematisch-naturwissenschaftliche-fakultaet/fachbereiche/informatik/lehrstuehle/algorithms-in-bioinformatics/software/megan6/"
documentation: "https://software-ab.informatik.uni-tuebingen.de/download/megan6/welcome.html"
tool_dev_url: "https://github.com/husonlab/megan-ce"
doi: "10.1371/journal.pcbi.1004957"
licence: "['GPL >=3']"
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- rma6:
type: file
description: RMA6 file from MEGAN or MALT
pattern: "*.rma6"
- megan_summary:
type: boolean
description: Specify whether to generate an MEGAN summary file
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- txt:
type: file
description: Compressed text file
pattern: "*.txt.gz"
- megan_summary:
type: file
description: Optionally generated MEGAN summary file
pattern: "*.megan"
authors:
- "@jfy133"

View file

@ -0,0 +1,45 @@
process METAPHLAN3 {
tag "$meta.id"
label 'process_high'
conda (params.enable_conda ? 'bioconda::metaphlan=3.0.12' : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/metaphlan:3.0.12--pyhb7b1952_0' :
'quay.io/biocontainers/metaphlan:3.0.12--pyhb7b1952_0' }"
input:
tuple val(meta), path(input)
path metaphlan_db
output:
tuple val(meta), path("*_profile.txt") , emit: profile
tuple val(meta), path("*.biom") , emit: biom
tuple val(meta), path('*.bowtie2out.txt'), optional:true, emit: bt2out
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def input_type = ("$input".endsWith(".fastq.gz") || "$input".endsWith(".fq.gz")) ? "--input_type fastq" : ("$input".contains(".fasta")) ? "--input_type fasta" : ("$input".endsWith(".bowtie2out.txt")) ? "--input_type bowtie2out" : "--input_type sam"
def input_data = ("$input_type".contains("fastq")) && !meta.single_end ? "${input[0]},${input[1]}" : "$input"
def bowtie2_out = "$input_type" == "--input_type bowtie2out" || "$input_type" == "--input_type sam" ? '' : "--bowtie2out ${prefix}.bowtie2out.txt"
"""
metaphlan \\
--nproc $task.cpus \\
$input_type \\
$input_data \\
$args \\
$bowtie2_out \\
--bowtie2db ${metaphlan_db} \\
--biom ${prefix}.biom \\
--output_file ${prefix}_profile.txt
cat <<-END_VERSIONS > versions.yml
"${task.process}":
metaphlan3: \$(metaphlan --version 2>&1 | awk '{print \$3}')
END_VERSIONS
"""
}

View file

@ -0,0 +1,52 @@
name: metaphlan3
description: MetaPhlAn is a tool for profiling the composition of microbial communities from metagenomic shotgun sequencing data.
keywords:
- metagenomics
- classification
- fastq
- bam
- fasta
tools:
- metaphlan3:
description: Identify clades (phyla to species) present in the metagenome obtained from a microbiome sample and their relative abundance
homepage: https://huttenhower.sph.harvard.edu/metaphlan/
documentation: https://github.com/biobakery/MetaPhlAn
doi: "10.7554/eLife.65088"
licence: ["MIT License"]
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- input:
type: file
description: Metaphlan 3.0 can classify the metagenome from a variety of input data types, including FASTQ files (single-end and paired-end), FASTA, bowtie2-produced SAM files (produced from alignments to the MetaPHlAn marker database) and intermediate bowtie2 alignment files (bowtie2out)
pattern: "*.{fastq.gz, fasta, fasta.gz, sam, bowtie2out.txt}"
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- profile:
type: file
description: Tab-separated output file of the predicted taxon relative abundances
pattern: "*.{txt}"
- biom:
type: file
description: General-use format for representing biological sample by observation contingency tables
pattern: "*.{biom}"
- bowtie2out:
type: file
description: Intermediate Bowtie2 output produced from mapping the metagenome against the MetaPHlAn marker database ( not compatible with `bowtie2out` files generated with MetaPhlAn versions below 3 )
pattern: "*.{bowtie2out.txt}"
authors:
- "@MGordon09"

View file

@ -0,0 +1,48 @@
process MINIMAP2_ALIGN {
tag "$meta.id"
label 'process_medium'
conda (params.enable_conda ? 'bioconda::minimap2=2.21 bioconda::samtools=1.12' : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/mulled-v2-66534bcbb7031a148b13e2ad42583020b9cd25c4:1679e915ddb9d6b4abda91880c4b48857d471bd8-0' :
'quay.io/biocontainers/mulled-v2-66534bcbb7031a148b13e2ad42583020b9cd25c4:1679e915ddb9d6b4abda91880c4b48857d471bd8-0' }"
input:
tuple val(meta), path(reads)
path reference
val bam_format
val cigar_paf_format
val cigar_bam
output:
tuple val(meta), path("*.paf"), optional: true, emit: paf
tuple val(meta), path("*.bam"), optional: true, emit: bam
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def input_reads = meta.single_end ? "$reads" : "${reads[0]} ${reads[1]}"
def bam_output = bam_format ? "-a | samtools sort | samtools view -@ ${task.cpus} -b -h -o ${prefix}.bam" : "-o ${prefix}.paf"
def cigar_paf = cigar_paf_format && !bam_format ? "-c" : ''
def set_cigar_bam = cigar_bam && bam_format ? "-L" : ''
"""
minimap2 \\
$args \\
-t $task.cpus \\
$reference \\
$input_reads \\
$cigar_paf \\
$set_cigar_bam \\
$bam_output
cat <<-END_VERSIONS > versions.yml
"${task.process}":
minimap2: \$(minimap2 --version 2>&1)
END_VERSIONS
"""
}

View file

@ -0,0 +1,65 @@
name: minimap2_align
description: A versatile pairwise aligner for genomic and spliced nucleotide sequences
keywords:
- align
- fasta
- fastq
- genome
- paf
- reference
tools:
- minimap2:
description: |
A versatile pairwise aligner for genomic and spliced nucleotide sequences.
homepage: https://github.com/lh3/minimap2
documentation: https://github.com/lh3/minimap2#uguide
licence: ["MIT"]
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- reads:
type: file
description: |
List of input FASTA or FASTQ files of size 1 and 2 for single-end
and paired-end data, respectively.
- reference:
type: file
description: |
Reference database in FASTA format.
- bam_format:
type: boolean
description: Specify that output should be in BAM format
- cigar_paf_format:
type: boolean
description: Specify that output CIGAR should be in PAF format
- cigar_bam:
type: boolean
description: |
Write CIGAR with >65535 ops at the CG tag. This is recommended when
doing XYZ (https://github.com/lh3/minimap2#working-with-65535-cigar-operations)
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- paf:
type: file
description: Alignment in PAF format
pattern: "*.paf"
- bam:
type: file
description: Alignment in BAM format
pattern: "*.bam"
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
authors:
- "@heuermh"
- "@sofstam"
- "@sateeshperi"
- "@jfy133"

View file

@ -0,0 +1,33 @@
process MINIMAP2_INDEX {
label 'process_medium'
conda (params.enable_conda ? 'bioconda::minimap2=2.21' : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/minimap2:2.21--h5bf99c6_0' :
'quay.io/biocontainers/minimap2:2.21--h5bf99c6_0' }"
input:
path fasta
output:
path "*.mmi" , emit: index
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
"""
minimap2 \\
-t $task.cpus \\
-d ${fasta.baseName}.mmi \\
$args \\
$fasta
cat <<-END_VERSIONS > versions.yml
"${task.process}":
minimap2: \$(minimap2 --version 2>&1)
END_VERSIONS
"""
}

View file

@ -0,0 +1,30 @@
name: minimap2_index
description: Provides fasta index required by minimap2 alignment.
keywords:
- index
- fasta
- reference
tools:
- minimap2:
description: |
A versatile pairwise aligner for genomic and spliced nucleotide sequences.
homepage: https://github.com/lh3/minimap2
documentation: https://github.com/lh3/minimap2#uguide
licence: ["MIT"]
input:
- fasta:
type: file
description: |
Reference database in FASTA format.
output:
- mmi:
type: file
description: Minimap2 fasta index.
pattern: "*.mmi"
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
authors:
- "@yuukiiwa"
- "@drpatelh"

View file

@ -0,0 +1,47 @@
VERSION = '3.0.1'
process MOTUS_MERGE {
label 'process_low'
conda (params.enable_conda ? "bioconda::motus=3.0.1" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/motus:3.0.1--pyhdfd78af_0':
'quay.io/biocontainers/motus:3.0.1--pyhdfd78af_0' }"
input:
path input
path db // to stop docker saying it can't find it... would have to have the module in upstream steps anyway
path profile_version_yml, stageAs: 'profile_version.yml'
val biom_format
output:
path("*.txt") , optional: true, emit: txt
path("*.biom"), optional: true, emit: biom
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
def prefix = 'motus_merged'
def cmd_input = input.size() > 1 ? "-i ${input.join(',')}" : input.isDirectory() ? "-d ${input}" : "-i ${input}"
def output = biom_format ? "-B -o ${prefix}.biom" : "-o ${prefix}.txt"
"""
motus \\
merge \\
-db $db \\
${cmd_input} \\
$args \\
${output}
## Take version from the mOTUs/profile module output, as cannot reconstruct
## version without having database staged in this directory.
VERSION=\$(cat ${profile_version_yml} | grep '/*motus:.*' | sed 's/.*otus: //g')
cat <<-END_VERSIONS > versions.yml
"${task.process}":
motus: \$VERSION
END_VERSIONS
"""
}

View file

@ -0,0 +1,57 @@
name: "motus_merge"
description: Taxonomic meta-omics profiling using universal marker genes
keywords:
- classify
- metagenomics
- fastq
- taxonomic profiling
- merging
- merge
- otu table
tools:
- "motus":
description: "Marker gene-based OTU (mOTU) profiling"
homepage: "https://motu-tool.org/"
documentation: "https://github.com/motu-tool/mOTUs/wiki"
tool_dev_url: "https://github.com/motu-tool/mOTUs"
doi: "10.1038/s41467-019-08844-4"
licence: "['GPL v3']"
input:
- input:
type: file
description: |
List of output files (more than one) from motus profile,
or a single directory containing motus output files.
- db:
type: directory
description: |
mOTUs database downloaded by `motus downloadDB`
pattern: "db_mOTU/"
- profile_version_yml:
type: file
description: |
A single versions.yml file output from motus/profile. motus/merge cannot reconstruct
this itself without having the motus database present and configured with the tool
so here we take it from what is already reported by the upstream module.
pattern: "versions.yml"
- biom_format:
type: boolean
description: Whether to save output OTU table in biom format
output:
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- txt:
type: file
description: OTU table in txt format, if BIOM format not requested
pattern: "*.txt"
- biom:
type: file
description: OTU table in biom format, if BIOM format requested
pattern: "*.biom"
authors:
- "@jfy133"

View file

@ -0,0 +1,56 @@
process MOTUS_PROFILE {
tag "$meta.id"
label 'process_medium'
conda (params.enable_conda ? "bioconda::motus=3.0.1" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/motus:3.0.1--pyhdfd78af_0':
'quay.io/biocontainers/motus:3.0.1--pyhdfd78af_0' }"
input:
tuple val(meta), path(reads)
path db
output:
tuple val(meta), path("*.out"), emit: out
tuple val(meta), path("*.bam"), optional: true, emit: bam
tuple val(meta), path("*.mgc"), optional: true, emit: mgc
tuple val(meta), path("*.log") , emit: log
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def inputs = reads[0].getExtension() == 'bam' ?
"-i ${reads}" :
reads[0].getExtension() == 'mgc' ? "-m $reads" :
meta.single_end ?
"-s $reads" : "-f ${reads[0]} -r ${reads[1]}"
def refdb = db ? "-db ${db}" : ""
"""
motus profile \\
$args \\
$inputs \\
$refdb \\
-t $task.cpus \\
-n $prefix \\
-o ${prefix}.out \\
2> ${prefix}.log
## mOTUs version number is not available from command line.
## mOTUs save the version number in index database folder.
## mOTUs will check the database version is same version as exec version.
if [ "$db" == "" ]; then
VERSION=\$(echo \$(motus -h 2>&1) | sed 's/^.*Version: //; s/References.*\$//')
else
VERSION=\$(grep motus $db/db_mOTU_versions | sed 's/motus\\t//g')
fi
cat <<-END_VERSIONS > versions.yml
"${task.process}":
motus: \$VERSION
END_VERSIONS
"""
}

View file

@ -0,0 +1,65 @@
name: "motus_profile"
description: Taxonomic meta-omics profiling using universal marker genes
keywords:
- classify
- metagenomics
- fastq
- taxonomic profiling
tools:
- "motus":
description: "Marker gene-based OTU (mOTU) profiling"
homepage: "https://motu-tool.org/"
documentation: "https://github.com/motu-tool/mOTUs/wiki"
tool_dev_url: "https://github.com/motu-tool/mOTUs"
doi: "10.1038/s41467-019-08844-4"
licence: "['GPL v3']"
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- reads:
type: file
description: |
List of input fastq/fasta files of size 1 and 2 for single-end and paired-end data,
respectively.
Or the intermediate bam file mapped by bwa to the mOTUs database or
the output bam file from motus profile.
Or the intermediate mgc read counts table.
pattern: "*.{fastq,fq,fasta,fa,fastq.gz,fq.gz,fasta.gz,fa.gz,.bam,.mgc}"
- db:
type: directory
description: |
mOTUs database downloaded by `motus downloadDB`
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- out:
type: file
description: Results with taxonomic classification of each read
pattern: "*.out"
- bam:
type: file
description: Optional intermediate sorted BAM file from BWA
pattern: "*.{bam}"
- mgc:
type: file
description: Optional intermediate mgc read count table file saved with `-M`.
pattern: "*.{mgc}"
- log:
type: file
description: Standard error logging file containing summary statistics
pattern: "*.log"
authors:
- "@jianhong"

37
modules/nf-core/modules/porechop/main.nf generated Normal file
View file

@ -0,0 +1,37 @@
process PORECHOP {
tag "$meta.id"
label 'process_medium'
conda (params.enable_conda ? "bioconda::porechop=0.2.4" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/porechop:0.2.4--py39h7cff6ad_2' :
'quay.io/biocontainers/porechop:0.2.4--py39h7cff6ad_2' }"
input:
tuple val(meta), path(reads)
output:
tuple val(meta), path("*.fastq.gz"), emit: reads
tuple val(meta), path("*.log") , emit: log
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
"""
porechop \\
-i $reads \\
-t $task.cpus \\
$args \\
-o ${prefix}.fastq.gz \\
> ${prefix}.log
cat <<-END_VERSIONS > versions.yml
"${task.process}":
porechop: \$( porechop --version )
END_VERSIONS
"""
}

View file

@ -0,0 +1,55 @@
name: porechop
description: Adapter removal and demultiplexing of Oxford Nanopore reads
keywords:
- adapter
- nanopore
- demultiplexing
tools:
- porechop:
description: Adapter removal and demultiplexing of Oxford Nanopore reads
homepage: "https://github.com/rrwick/Porechop"
documentation: "https://github.com/rrwick/Porechop"
tool_dev_url: "https://github.com/rrwick/Porechop"
doi: "10.1099/mgen.0.000132"
licence: ["GPL v3"]
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- reads:
type: file
description: fastq/fastq.gz file
pattern: "*.{fastq,fastq.gz,fq,fq.gz}"
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- reads:
type: file
description: Demultiplexed and/or adapter-trimmed fastq.gz file
pattern: "*.{fastq.gz}"
- log:
type: file
description: Log file containing stdout information
pattern: "*.log"
authors:
- "@ggabernet"
- "@jasmezz"
- "@d4straub"
- "@LaurenceKuhl"
- "@SusiJo"
- "@jonasscheid"
- "@jonoave"
- "@GokceOGUZ"
- "@jfy133"

View file

@ -0,0 +1,61 @@
process PRINSEQPLUSPLUS {
tag "$meta.id"
label 'process_low'
conda (params.enable_conda ? "bioconda::prinseq-plus-plus=1.2.3" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/prinseq-plus-plus:1.2.3--hc90279e_1':
'quay.io/biocontainers/prinseq-plus-plus:1.2.3--hc90279e_1' }"
input:
tuple val(meta), path(reads)
output:
tuple val(meta), path("*_good_out*.fastq.gz") , emit: good_reads
tuple val(meta), path("*_single_out*.fastq.gz"), optional: true, emit: single_reads
tuple val(meta), path("*_bad_out*.fastq.gz") , optional: true, emit: bad_reads
tuple val(meta), path("*.log") , emit: log
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
if (meta.single_end) {
"""
prinseq++ \\
-threads $task.cpus \\
-fastq ${reads} \\
-out_name ${prefix} \\
-out_gz \\
-VERBOSE 1 \\
$args \\
| tee ${prefix}.log
cat <<-END_VERSIONS > versions.yml
"${task.process}":
prinseqplusplus: \$(echo \$(prinseq++ --version | cut -f 2 -d ' ' ))
END_VERSIONS
"""
} else {
"""
prinseq++ \\
-threads $task.cpus \\
-fastq ${reads[0]} \\
-fastq2 ${reads[1]} \\
-out_name ${prefix} \\
-out_gz \\
-VERBOSE 1 \\
$args \\
| tee ${prefix}.log
cat <<-END_VERSIONS > versions.yml
"${task.process}":
prinseqplusplus: \$(echo \$(prinseq++ --version | cut -f 2 -d ' ' ))
END_VERSIONS
"""
}
}

View file

@ -0,0 +1,60 @@
name: "prinseqplusplus"
description: PRINSEQ++ is a C++ implementation of the prinseq-lite.pl program. It can be used to filter, reformat or trim genomic and metagenomic sequence data
keywords:
- fastq
- fasta
- filter
- trim
tools:
- "prinseqplusplus":
description: "PRINSEQ++ - Multi-threaded C++ sequence cleaning"
homepage: "https://github.com/Adrian-Cantu/PRINSEQ-plus-plus"
documentation: "https://github.com/Adrian-Cantu/PRINSEQ-plus-plus"
tool_dev_url: "https://github.com/Adrian-Cantu/PRINSEQ-plus-plus"
doi: "10.7287/peerj.preprints.27553v1"
licence: "['GPL v2']"
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- reads:
type: file
description: |
List of input FastQ files of size 1 and 2 for single-end and paired-end
data, respectively.
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- good_reads:
type: file
description: Reads passing filter(s) in gzipped FASTQ format
pattern: "*_good_out_{R1,R2}.fastq.gz"
- single_reads:
type: file
description: |
Single reads without the pair passing filter(s) in gzipped FASTQ format
pattern: "*_single_out_{R1,R2}.fastq.gz"
- bad_reads:
type: file
description: |
Reads without not passing filter(s) in gzipped FASTQ format
pattern: "*_bad_out_{R1,R2}.fastq.gz"
- log:
type: file
description: |
Verbose level 2 STDOUT information in a log file
pattern: "*.log"
authors:
- "@jfy133"

View file

@ -0,0 +1,56 @@
process SAMTOOLS_BAM2FQ {
tag "$meta.id"
label 'process_low'
conda (params.enable_conda ? "bioconda::samtools=1.15.1" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/samtools:1.15.1--h1170115_0' :
'quay.io/biocontainers/samtools:1.15.1--h1170115_0' }"
input:
tuple val(meta), path(inputbam)
val split
output:
tuple val(meta), path("*.fq.gz"), emit: reads
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
if (split){
"""
samtools \\
bam2fq \\
$args \\
-@ $task.cpus \\
-1 ${prefix}_1.fq.gz \\
-2 ${prefix}_2.fq.gz \\
-0 ${prefix}_other.fq.gz \\
-s ${prefix}_singleton.fq.gz \\
$inputbam
cat <<-END_VERSIONS > versions.yml
"${task.process}":
samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
END_VERSIONS
"""
} else {
"""
samtools \\
bam2fq \\
$args \\
-@ $task.cpus \\
$inputbam | gzip --no-name > ${prefix}_interleaved.fq.gz
cat <<-END_VERSIONS > versions.yml
"${task.process}":
samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
END_VERSIONS
"""
}
}

View file

@ -0,0 +1,55 @@
name: samtools_bam2fq
description: |
The module uses bam2fq method from samtools to
convert a SAM, BAM or CRAM file to FASTQ format
keywords:
- bam2fq
- samtools
- fastq
tools:
- samtools:
description: Tools for dealing with SAM, BAM and CRAM files
homepage: None
documentation: http://www.htslib.org/doc/1.1/samtools.html
tool_dev_url: None
doi: ""
licence: ["MIT"]
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- inputbam:
type: file
description: BAM/CRAM/SAM file
pattern: "*.{bam,cram,sam}"
- split:
type: boolean
description: |
TRUE/FALSE value to indicate if reads should be separated into
/1, /2 and if present other, or singleton.
Note: choosing TRUE will generate 4 different files.
Choosing FALSE will produce a single file, which will be interleaved in case
the input contains paired reads.
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- reads:
type: file
description: |
FASTQ files, which will be either a group of 4 files (read_1, read_2, other and singleton)
or a single interleaved .fq.gz file if the user chooses not to split the reads.
pattern: "*.fq.gz"
authors:
- "@lescai"

View file

@ -0,0 +1,56 @@
process SAMTOOLS_VIEW {
tag "$meta.id"
label 'process_medium'
conda (params.enable_conda ? "bioconda::samtools=1.15.1" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/samtools:1.15.1--h1170115_0' :
'quay.io/biocontainers/samtools:1.15.1--h1170115_0' }"
input:
tuple val(meta), path(input), path(index)
path fasta
output:
tuple val(meta), path("*.bam") , emit: bam , optional: true
tuple val(meta), path("*.cram"), emit: cram, optional: true
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
def args2 = task.ext.args2 ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def reference = fasta ? "--reference ${fasta} -C" : ""
def file_type = input.getExtension()
if ("$input" == "${prefix}.${file_type}") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!"
"""
samtools \\
view \\
--threads ${task.cpus-1} \\
${reference} \\
$args \\
$input \\
$args2 \\
> ${prefix}.${file_type}
cat <<-END_VERSIONS > versions.yml
"${task.process}":
samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
END_VERSIONS
"""
stub:
def prefix = task.ext.prefix ?: "${meta.id}"
"""
touch ${prefix}.bam
touch ${prefix}.cram
cat <<-END_VERSIONS > versions.yml
"${task.process}":
samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
END_VERSIONS
"""
}

View file

@ -0,0 +1,57 @@
name: samtools_view
description: filter/convert SAM/BAM/CRAM file
keywords:
- view
- bam
- sam
- cram
tools:
- samtools:
description: |
SAMtools is a set of utilities for interacting with and post-processing
short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li.
These files are generated as output by short read aligners like BWA.
homepage: http://www.htslib.org/
documentation: hhttp://www.htslib.org/doc/samtools.html
doi: 10.1093/bioinformatics/btp352
licence: ["MIT"]
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- input:
type: file
description: BAM/CRAM/SAM file
pattern: "*.{bam,cram,sam}"
- index:
type: optional file
description: BAM.BAI/CRAM.CRAI file
pattern: "*.{.bai,.crai}"
- fasta:
type: optional file
description: Reference file the CRAM was created with
pattern: "*.{fasta,fa}"
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- bam:
type: file
description: filtered/converted BAM/SAM file
pattern: "*.{bam,sam}"
- cram:
type: file
description: filtered/converted CRAM file
pattern: "*.cram"
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
authors:
- "@drpatelh"
- "@joseespinosa"
- "@FriederikeHanssen"

36
modules/nf-core/modules/untar/main.nf generated Normal file
View file

@ -0,0 +1,36 @@
process UNTAR {
tag "$archive"
label 'process_low'
conda (params.enable_conda ? "conda-forge::tar=1.32" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://containers.biocontainers.pro/s3/SingImgsRepo/biocontainers/v1.2.0_cv1/biocontainers_v1.2.0_cv1.img' :
'biocontainers/biocontainers:v1.2.0_cv1' }"
input:
tuple val(meta), path(archive)
output:
tuple val(meta), path("$untar"), emit: untar
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
def args2 = task.ext.args2 ?: ''
untar = archive.toString() - '.tar.gz'
"""
tar \\
-xzvf \\
$args \\
$archive \\
$args2 \\
cat <<-END_VERSIONS > versions.yml
"${task.process}":
untar: \$(echo \$(tar --version 2>&1) | sed 's/^.*(GNU tar) //; s/ Copyright.*\$//')
END_VERSIONS
"""
}

38
modules/nf-core/modules/untar/meta.yml generated Normal file
View file

@ -0,0 +1,38 @@
name: untar
description: Extract files.
keywords:
- untar
- uncompress
tools:
- untar:
description: |
Extract tar.gz files.
documentation: https://www.gnu.org/software/tar/manual/
licence: ["GPL-3.0-or-later"]
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- archive:
type: file
description: File to be untar
pattern: "*.{tar}.{gz}"
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- untar:
type: file
description:
pattern: "*.*"
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
authors:
- "@joseespinosa"
- "@drpatelh"

View file

@ -34,7 +34,7 @@ params {
help = false
validate_params = true
show_hidden_params = false
schema_ignore_params = 'genomes'
schema_ignore_params = 'genomes,fasta'
enable_conda = false
@ -53,6 +53,91 @@ params {
max_cpus = 16
max_time = '240.h'
// Databases
databases = null
// FASTQ preprocessing
perform_shortread_qc = false
shortread_qc_tool = 'fastp'
shortread_qc_skipadaptertrim = false
shortread_qc_mergepairs = false
shortread_qc_excludeunmerged = false
shortread_qc_adapter1 = null
shortread_qc_adapter2 = null
shortread_qc_minlength = 15
perform_longread_qc = false
longread_qc_run_clip = false
longread_qc_run_filter = false
longread_qc_minlength = 1000
longread_qc_keep_percent = 90
longread_qc_target_bases = 500000000
save_preprocessed_reads = false
// Complexity filtering
perform_shortread_complexityfilter = false
shortread_complexityfilter_tool = 'bbduk'
shortread_complexityfilter_entropy = 0.3
shortread_complexityfilter_bbduk_windowsize = 50
shortread_complexityfilter_bbduk_mask = false
shortread_complexityfilter_prinseqplusplus_mode = 'entropy'
shortread_complexityfilter_prinseqplusplus_dustscore = 0.5
shortread_complexityfilter_fastp_threshold = 30
save_complexityfiltered_reads = false
// run merging
perform_runmerging = false
save_runmerged_reads = false
// Host Removal
perform_shortread_hostremoval = false
perform_longread_hostremoval = false
hostremoval_reference = null
shortread_hostremoval_index = null
longread_hostremoval_index = null
save_hostremoval_index = false
save_hostremoval_mapped = false
save_hostremoval_unmapped = false
// MALT
run_malt = false
malt_mode = 'BlastN'
malt_generate_megansummary = false
malt_save_reads = false
// kraken2
run_kraken2 = false
kraken2_save_reads = false
kraken2_save_readclassification = false
// centrifuge
run_centrifuge = false
centrifuge_save_reads = false
// metaphlan3
run_metaphlan3 = false
// kaiju
run_kaiju = false
kaiju_taxon_name = 'species'
// diamond
run_diamond = false
diamond_output_format = 'tsv' // TSV is only format with taxonomic information apparently
diamond_save_reads = false // this will override default diamond output format so no taxonomic profile is generated!
// mOTUs
run_motus = false
// krona
run_krona = false
krona_taxonomy_directory = null
// profile standardisation
run_profile_standardisation = false
generate_biom_output = false
}
// Load base.config by default for all pipelines
@ -67,11 +152,11 @@ try {
// Load nf-core/taxprofiler custom profiles from different institutions.
// Warning: Uncomment only if a pipeline-specific instititutional config already exists on nf-core/configs!
// try {
// includeConfig "${params.custom_config_base}/pipeline/taxprofiler.config"
// } catch (Exception e) {
// System.err.println("WARNING: Could not load nf-core/config/taxprofiler profiles: ${params.custom_config_base}/pipeline/taxprofiler.config")
// }
try {
includeConfig "${params.custom_config_base}/pipeline/taxprofiler.config"
} catch (Exception e) {
System.err.println("WARNING: Could not load nf-core/config/taxprofiler profiles: ${params.custom_config_base}/pipeline/taxprofiler.config")
}
@ -136,12 +221,17 @@ profiles {
executor.cpus = 16
executor.memory = 60.GB
}
test { includeConfig 'conf/test.config' }
test_full { includeConfig 'conf/test_full.config' }
test { includeConfig 'conf/test.config' }
test_full { includeConfig 'conf/test_full.config' }
test_noprofiling { includeConfig 'conf/test_noprofiling.config' }
test_nopreprocessing { includeConfig 'conf/test_nopreprocessing.config' }
test_nothing { includeConfig 'conf/test_nothing.config' }
test_motus { includeConfig 'conf/test_motus.config' }
}
// Load igenomes.config if required
if (!params.igenomes_ignore) {
includeConfig 'conf/igenomes.config'
} else {
@ -154,7 +244,7 @@ if (!params.igenomes_ignore) {
// See https://apeltzer.github.io/post/03-julia-lang-nextflow/ for details on that. Once we have a common agreement on where to keep Julia packages, this is adjustable.
env {
PYTHONNOUSERSITE = 1
PYTHONNOUSERSITE = '1'
R_PROFILE_USER = "/.Rprofile"
R_ENVIRON_USER = "/.Renviron"
JULIA_DEPOT_PATH = "/usr/local/share/julia"

View file

@ -52,16 +52,8 @@
"type": "string",
"description": "Name of iGenomes reference.",
"fa_icon": "fas fa-book",
"help_text": "If using a reference genome configured in the pipeline using iGenomes, use this parameter to give the ID for the reference. This is then used to build the full paths for all required reference genome files e.g. `--genome GRCh38`. \n\nSee the [nf-core website docs](https://nf-co.re/usage/reference_genomes) for more details."
},
"fasta": {
"type": "string",
"format": "file-path",
"mimetype": "text/plain",
"pattern": "^\\S+\\.fn?a(sta)?(\\.gz)?$",
"description": "Path to FASTA genome file.",
"help_text": "This parameter is *mandatory* if `--genome` is not specified. If you don't have a BWA index available this will be generated for you automatically. Combine with `--save_reference` to save BWA index for future runs.",
"fa_icon": "far fa-file-code"
"help_text": "If using a reference genome configured in the pipeline using iGenomes, use this parameter to give the ID for the reference. This is then used to build the full paths for all required reference genome files e.g. `--genome GRCh38`. \n\nSee the [nf-core website docs](https://nf-co.re/usage/reference_genomes) for more details.",
"hidden": true
},
"igenomes_base": {
"type": "string",
@ -265,5 +257,201 @@
{
"$ref": "#/definitions/generic_options"
}
]
],
"properties": {
"databases": {
"type": "string",
"mimetype": "text/csv",
"format": "file-path",
"default": "None"
},
"shortread_qc_excludeunmerged": {
"type": "boolean"
},
"run_malt": {
"type": "boolean"
},
"malt_mode": {
"type": "string",
"default": "BlastN"
},
"run_kraken2": {
"type": "boolean"
},
"run_centrifuge": {
"type": "boolean"
},
"run_metaphlan3": {
"type": "boolean",
"description": "Enable MetaPhlAn for taxonomic profiling"
},
"shortread_qc_tool": {
"type": "string",
"default": "fastp",
"enum": ["fastp", "adapterremoval"]
},
"shortread_qc_skipadaptertrim": {
"type": "boolean"
},
"shortread_qc_mergepairs": {
"type": "boolean"
},
"shortread_qc_adapter1": {
"type": "string",
"default": "None"
},
"shortread_qc_adapter2": {
"type": "string",
"default": "None"
},
"shortread_qc_minlength": {
"type": "integer",
"default": 15
},
"save_preprocessed_reads": {
"type": "boolean"
},
"shortread_complexityfilter_tool": {
"type": "string",
"default": "bbduk",
"enum": ["bbduk", "prinseqplusplus", "fastp"]
},
"shortread_complexityfilter_bbduk_windowsize": {
"type": "integer",
"default": 50
},
"shortread_complexityfilter_bbduk_mask": {
"type": "boolean"
},
"shortread_complexityfilter_entropy": {
"type": "number",
"default": 0.3
},
"shortread_complexityfilter_prinseqplusplus_mode": {
"type": "string",
"default": "entropy",
"enum": ["entropy", "dust"]
},
"shortread_complexityfilter_prinseqplusplus_dustscore": {
"type": "number",
"default": 0.5
},
"save_complexityfiltered_reads": {
"type": "boolean"
},
"save_runmerged_reads": {
"type": "boolean"
},
"perform_shortread_qc": {
"type": "boolean"
},
"perform_longread_qc": {
"type": "boolean"
},
"perform_shortread_complexityfilter": {
"type": "boolean"
},
"perform_runmerging": {
"type": "boolean"
},
"perform_shortread_hostremoval": {
"type": "boolean"
},
"perform_longread_hostremoval": {
"type": "boolean"
},
"hostremoval_reference": {
"type": "string",
"default": "None"
},
"shortread_hostremoval_index": {
"type": "string",
"default": "None"
},
"save_hostremoval_index": {
"type": "boolean"
},
"save_hostremoval_mapped": {
"type": "boolean"
},
"save_hostremoval_unmapped": {
"type": "boolean"
},
"run_kaiju": {
"type": "boolean"
},
"malt_generate_megansummary": {
"type": "boolean"
},
"kaiju_taxon_name": {
"type": "string",
"default": "species",
"enum": ["phylum", "class", "order", "family", "genus", "species"]
},
"run_diamond": {
"type": "boolean"
},
"diamond_output_format": {
"type": "string",
"default": "tsv",
"enum": ["blast", "xml", "txt", "daa", "sam", "tsv", "paf"]
},
"longread_hostremoval_index": {
"type": "string",
"default": "None"
},
"shortread_complexityfilter_fastp_threshold": {
"type": "integer",
"default": 30
},
"longread_qc_run_clip": {
"type": "boolean"
},
"longread_qc_run_filter": {
"type": "boolean"
},
"longread_qc_minlength": {
"type": "integer",
"default": 1000
},
"longread_qc_keep_percent": {
"type": "integer",
"default": 90
},
"longread_qc_target_bases": {
"type": "integer",
"default": 500000000
},
"run_motus": {
"type": "boolean"
},
"malt_save_reads": {
"type": "boolean"
},
"kraken2_save_reads": {
"type": "boolean"
},
"kraken2_save_readclassification": {
"type": "boolean"
},
"centrifuge_save_reads": {
"type": "boolean"
},
"diamond_save_reads": {
"type": "boolean"
},
"run_krona": {
"type": "boolean"
},
"krona_taxonomy_directory": {
"type": "string",
"default": "None"
},
"run_profile_standardisation": {
"type": "boolean"
},
"generate_biom_output": {
"type": "boolean"
}
}
}

View file

@ -0,0 +1,53 @@
//
// Check input samplesheet and get read channels
//
include { DATABASE_CHECK } from '../../modules/local/database_check'
include { UNTAR } from '../../modules/nf-core/modules/untar/main'
workflow DB_CHECK {
take:
dbsheet // file: /path/to/dbsheet.csv
main:
// TODO: make database sheet check
// Checks:
// 1) no duplicates,
// 2) args do not have quotes, e.g. just `,,` and NOT `,"",`
parsed_samplesheet = DATABASE_CHECK ( dbsheet )
.csv
.splitCsv ( header:true, sep:',' )
.map { create_db_channels(it) }
ch_dbs_for_untar = parsed_samplesheet
.branch {
untar: it[1].toString().endsWith(".tar.gz")
skip: true
}
// TODO Filter to only run UNTAR on DBs of tools actually using?
// TODO make optional whether to save
UNTAR ( ch_dbs_for_untar.untar )
ch_final_dbs = ch_dbs_for_untar.skip.mix( UNTAR.out.untar )
emit:
dbs = ch_final_dbs // channel: [ val(meta), [ db ] ]
versions = DATABASE_CHECK.out.versions.mix(UNTAR.out.versions.first()) // channel: [ versions.yml ]
}
def create_db_channels(LinkedHashMap row) {
def meta = [:]
meta.tool = row.tool
meta.db_name = row.db_name
meta.db_params = row.db_params
def array = []
if (!file(row.db_path, type: 'dir').exists()) {
exit 1, "ERROR: Please check input samplesheet -> database could not be found!\n${row.db_path}"
}
array = [ meta, file(row.db_path) ]
return array
}

View file

@ -9,14 +9,31 @@ workflow INPUT_CHECK {
samplesheet // file: /path/to/samplesheet.csv
main:
SAMPLESHEET_CHECK ( samplesheet )
parsed_samplesheet = SAMPLESHEET_CHECK ( samplesheet )
.csv
.splitCsv ( header:true, sep:',' )
.branch {
fasta: it['fasta'] != ''
nanopore: it['instrument_platform'] == 'OXFORD_NANOPORE'
fastq: true
}
parsed_samplesheet.fastq
.map { create_fastq_channel(it) }
.set { reads }
.set { fastq }
parsed_samplesheet.nanopore
.map { create_fastq_channel(it) }
.set { nanopore }
parsed_samplesheet.fasta
.map { create_fasta_channel(it) }
.set { fasta }
emit:
reads // channel: [ val(meta), [ reads ] ]
fastq = fastq ?: [] // channel: [ val(meta), [ reads ] ]
nanopore = nanopore ?: [] // channel: [ val(meta), [ reads ] ]
fasta = fasta ?: [] // channel: [ val(meta), fasta ]
versions = SAMPLESHEET_CHECK.out.versions // channel: [ versions.yml ]
}
@ -24,8 +41,11 @@ workflow INPUT_CHECK {
def create_fastq_channel(LinkedHashMap row) {
// create meta map
def meta = [:]
meta.id = row.sample
meta.single_end = row.single_end.toBoolean()
meta.id = row.sample
meta.run_accession = row.run_accession
meta.instrument_platform = row.instrument_platform
meta.single_end = row.single_end.toBoolean()
meta.is_fasta = false
// add path(s) of the fastq file(s) to the meta map
def fastq_meta = []
@ -35,10 +55,34 @@ def create_fastq_channel(LinkedHashMap row) {
if (meta.single_end) {
fastq_meta = [ meta, [ file(row.fastq_1) ] ]
} else {
if (!file(row.fastq_2).exists()) {
exit 1, "ERROR: Please check input samplesheet -> Read 2 FastQ file does not exist!\n${row.fastq_2}"
if (meta.instrument_platform == 'OXFORD_NANOPORE') {
if (row.fastq_2 != '') {
exit 1, "ERROR: Please check input samplesheet -> For Oxford Nanopore reads Read 2 FastQ should be empty!\n${row.fastq_2}"
}
fastq_meta = [ meta, [ file(row.fastq_1) ] ]
} else {
if (!file(row.fastq_2).exists()) {
exit 1, "ERROR: Please check input samplesheet -> Read 2 FastQ file does not exist!\n${row.fastq_2}"
}
fastq_meta = [ meta, [ file(row.fastq_1), file(row.fastq_2) ] ]
}
fastq_meta = [ meta, [ file(row.fastq_1), file(row.fastq_2) ] ]
}
return fastq_meta
}// Function to get list of [ meta, fasta ]
def create_fasta_channel(LinkedHashMap row) {
def meta = [:]
meta.id = row.sample
meta.run_accession = row.run_accession
meta.instrument_platform = row.instrument_platform
meta.single_end = true
meta.is_fasta = true
def array = []
if (!file(row.fasta).exists()) {
exit 1, "ERROR: Please check input samplesheet -> FastA file does not exist!\n${row.fasta}"
}
array = [ meta, [ file(row.fasta) ] ]
return array
}

View file

@ -0,0 +1,47 @@
//
// Remove host reads via alignment and export off-target reads
//
include { MINIMAP2_INDEX } from '../../modules/nf-core/modules/minimap2/index/main'
include { MINIMAP2_ALIGN } from '../../modules/nf-core/modules/minimap2/align/main'
include { SAMTOOLS_VIEW } from '../../modules/nf-core/modules/samtools/view/main'
include { SAMTOOLS_BAM2FQ } from '../../modules/nf-core/modules/samtools/bam2fq/main'
workflow LONGREAD_HOSTREMOVAL {
take:
reads // [ [ meta ], [ reads ] ]
reference // /path/to/fasta
index // /path/to/index
main:
ch_versions = Channel.empty()
ch_multiqc_files = Channel.empty()
if ( !params.longread_hostremoval_index ) {
ch_minimap2_index = MINIMAP2_INDEX ( reference ).index
ch_versions = ch_versions.mix( MINIMAP2_INDEX.out.versions )
} else {
ch_minimap2_index = index
}
MINIMAP2_ALIGN ( reads, ch_minimap2_index, true, false, false )
ch_versions = ch_versions.mix( MINIMAP2_ALIGN.out.versions.first() )
ch_minimap2_mapped = MINIMAP2_ALIGN.out.bam
.map {
meta, reads ->
[ meta, reads, [] ]
}
SAMTOOLS_VIEW ( ch_minimap2_mapped , [] )
ch_versions = ch_versions.mix( SAMTOOLS_VIEW.out.versions.first() )
SAMTOOLS_BAM2FQ ( SAMTOOLS_VIEW.out.bam, false )
ch_versions = ch_versions.mix( SAMTOOLS_BAM2FQ.out.versions.first() )
emit:
reads = SAMTOOLS_BAM2FQ.out.reads // channel: [ val(meta), [ reads ] ]
versions = ch_versions // channel: [ versions.yml ]
}

View file

@ -0,0 +1,63 @@
//
// Process long raw reads with porechop
//
include { FASTQC as FASTQC_PROCESSED } from '../../modules/nf-core/modules/fastqc/main'
include { PORECHOP } from '../../modules/nf-core/modules/porechop/main'
include { FILTLONG } from '../../modules/nf-core/modules/filtlong/main'
workflow LONGREAD_PREPROCESSING {
take:
reads
main:
ch_versions = Channel.empty()
ch_multiqc_files = Channel.empty()
if ( params.longread_qc_run_clip && !params.longread_qc_run_filter ) {
PORECHOP ( reads )
ch_processed_reads = PORECHOP.out.reads
.map {
meta, reads ->
def meta_new = meta.clone()
meta_new['single_end'] = 1
[ meta_new, reads ]
}
ch_versions = ch_versions.mix(PORECHOP.out.versions.first())
ch_multiqc_files = ch_multiqc_files.mix( PORECHOP.out.log )
} else if ( !params.longread_qc_run_clip && params.longread_qc_run_filter ) {
ch_processed_reads = FILTLONG ( reads.map{ meta, reads -> [meta, [], reads ]} )
ch_versions = ch_versions.mix(FILTLONG.out.versions.first())
ch_multiqc_files = ch_multiqc_files.mix( FILTLONG.out.log )
} else {
PORECHOP ( reads )
ch_clipped_reads = PORECHOP.out.reads
.map {
meta, reads ->
def meta_new = meta.clone()
meta_new['single_end'] = 1
[ meta_new, reads ]
}
ch_processed_reads = FILTLONG ( ch_clipped_reads.map{ meta, reads -> [meta, [], reads ]} ).reads
ch_versions = ch_versions.mix(PORECHOP.out.versions.first())
ch_versions = ch_versions.mix(FILTLONG.out.versions.first())
ch_multiqc_files = ch_multiqc_files.mix( PORECHOP.out.log )
ch_multiqc_files = ch_multiqc_files.mix( FILTLONG.out.log )
}
FASTQC_PROCESSED ( ch_processed_reads )
ch_multiqc_files = ch_multiqc_files.mix( FASTQC_PROCESSED.out.zip )
emit:
reads = ch_processed_reads // channel: [ val(meta), [ reads ] ]
versions = ch_versions // channel: [ versions.yml ]
mqc = ch_multiqc_files
}

View file

@ -0,0 +1,241 @@
//
// Run profiling
//
include { MALT_RUN } from '../../modules/nf-core/modules/malt/run/main'
include { MEGAN_RMA2INFO } from '../../modules/nf-core/modules/megan/rma2info/main'
include { KRAKEN2_KRAKEN2 } from '../../modules/nf-core/modules/kraken2/kraken2/main'
include { CENTRIFUGE_CENTRIFUGE } from '../../modules/nf-core/modules/centrifuge/centrifuge/main'
include { CENTRIFUGE_KREPORT } from '../../modules/nf-core/modules/centrifuge/kreport/main'
include { METAPHLAN3 } from '../../modules/nf-core/modules/metaphlan3/main'
include { KAIJU_KAIJU } from '../../modules/nf-core/modules/kaiju/kaiju/main'
include { KAIJU_KAIJU2TABLE } from '../../modules/nf-core/modules/kaiju/kaiju2table/main'
include { DIAMOND_BLASTX } from '../../modules/nf-core/modules/diamond/blastx/main'
include { MOTUS_PROFILE } from '../../modules/nf-core/modules/motus/profile/main'
workflow PROFILING {
take:
reads // [ [ meta ], [ reads ] ]
databases // [ [ meta ], path ]
main:
ch_versions = Channel.empty()
ch_multiqc_files = Channel.empty()
ch_raw_classifications = Channel.empty()
ch_raw_profiles = Channel.empty()
/*
COMBINE READS WITH POSSIBLE DATABASES
*/
// e.g. output [DUMP: reads_plus_db] [['id':'2612', 'run_accession':'combined', 'instrument_platform':'ILLUMINA', 'single_end':1], <reads_path>/2612.merged.fastq.gz, ['tool':'malt', 'db_name':'mal95', 'db_params':'"-id 90"'], <db_path>/malt90]
ch_input_for_profiling = reads
.map {
meta, reads ->
def meta_new = meta.clone()
pairtype = meta_new['single_end'] ? '_se' : '_pe'
meta_new['id'] = meta_new['id'] + pairtype
[meta_new, reads]
}
.combine(databases)
.branch {
malt: it[2]['tool'] == 'malt'
kraken2: it[2]['tool'] == 'kraken2'
metaphlan3: it[2]['tool'] == 'metaphlan3'
centrifuge: it[2]['tool'] == 'centrifuge'
kaiju: it[2]['tool'] == 'kaiju'
diamond: it[2]['tool'] == 'diamond'
motus: it[2]['tool'] == 'motus'
unknown: true
}
/*
PREPARE PROFILER INPUT CHANNELS & RUN PROFILING
*/
// Each tool as a slightly different input structure and generally separate
// input channels for reads vs databases. We restructure the channel tuple
// for each tool and make liberal use of multiMap to keep reads/databases
// channel element order in sync with each other
if ( params.run_malt ) {
// MALT: We groupTuple to have all samples in one channel for MALT as database
// loading takes a long time, so we only want to run it once per database
ch_input_for_malt = ch_input_for_profiling.malt
.filter { it[0]['instrument_platform'] == 'ILLUMINA' }
.map {
meta, reads, db_meta, db ->
// Reset entire input meta for MALT to just database name,
// as we don't run run on a per-sample basis due to huge datbaases
// so all samples are in one run and so sample-specific metadata
// unnecessary. Set as database name to prevent `null` job ID and prefix.
def temp_meta = [ id: meta['db_name'] ]
// Extend database parameters to specify whether to save alignments or not
def new_db_meta = db_meta.clone()
def sam_format = params.malt_save_reads ? ' --alignments ./ -za false' : ""
new_db_meta['db_params'] = db_meta['db_params'] + sam_format
// Combine reduced sample metadata with updated database parameters metadata,
// make sure id is db_name for publishing purposes.
def new_meta = temp_meta + new_db_meta
new_meta['id'] = new_meta['db_name']
[ new_meta, reads, db ]
}
.groupTuple(by: [0,2])
.multiMap {
it ->
reads: [ it[0], it[1].flatten() ]
db: it[2]
}
MALT_RUN ( ch_input_for_malt.reads, params.malt_mode, ch_input_for_malt.db )
ch_maltrun_for_megan = MALT_RUN.out.rma6
.transpose()
.map{
meta, rma ->
// re-extract meta from file names, use filename without rma to
// ensure we keep paired-end information in downstream filenames
// when no pair-merging
def meta_new = meta.clone()
meta_new['db_name'] = meta.id
meta_new['id'] = rma.baseName
[ meta_new, rma ]
}
MEGAN_RMA2INFO (ch_maltrun_for_megan, params.malt_generate_megansummary )
ch_multiqc_files = ch_multiqc_files.mix( MALT_RUN.out.log )
ch_versions = ch_versions.mix( MALT_RUN.out.versions.first(), MEGAN_RMA2INFO.out.versions.first() )
ch_raw_classifications = ch_raw_classifications.mix( ch_maltrun_for_megan )
ch_raw_profiles = ch_raw_profiles.mix( MEGAN_RMA2INFO.out.txt )
}
if ( params.run_kraken2 ) {
ch_input_for_kraken2 = ch_input_for_profiling.kraken2
.multiMap {
it ->
reads: [ it[0] + it[2], it[1] ]
db: it[3]
}
KRAKEN2_KRAKEN2 ( ch_input_for_kraken2.reads, ch_input_for_kraken2.db, params.kraken2_save_reads, params.kraken2_save_readclassification )
ch_multiqc_files = ch_multiqc_files.mix( KRAKEN2_KRAKEN2.out.report )
ch_versions = ch_versions.mix( KRAKEN2_KRAKEN2.out.versions.first() )
ch_raw_classifications = ch_raw_classifications.mix( KRAKEN2_KRAKEN2.out.classified_reads_assignment )
ch_raw_profiles = ch_raw_profiles.mix( KRAKEN2_KRAKEN2.out.report )
}
if ( params.run_centrifuge ) {
ch_input_for_centrifuge = ch_input_for_profiling.centrifuge
.filter{
if (it[0].is_fasta) log.warn "[nf-core/taxprofiler] Centrifuge currently does not accept FASTA files as input. Skipping Centrifuge for sample ${it[0].id}."
!it[0].is_fasta
}
.multiMap {
it ->
reads: [ it[0] + it[2], it[1] ]
db: it[3]
}
CENTRIFUGE_CENTRIFUGE ( ch_input_for_centrifuge.reads, ch_input_for_centrifuge.db, params.centrifuge_save_reads, params.centrifuge_save_reads, params.centrifuge_save_reads )
CENTRIFUGE_KREPORT (CENTRIFUGE_CENTRIFUGE.out.report, ch_input_for_centrifuge.db)
ch_versions = ch_versions.mix( CENTRIFUGE_CENTRIFUGE.out.versions.first() )
ch_raw_classifications = ch_raw_classifications.mix( CENTRIFUGE_CENTRIFUGE.out.results )
ch_raw_profiles = ch_raw_profiles.mix( CENTRIFUGE_KREPORT.out.kreport )
ch_multiqc_files = ch_multiqc_files.mix( CENTRIFUGE_KREPORT.out.kreport )
}
if ( params.run_metaphlan3 ) {
ch_input_for_metaphlan3 = ch_input_for_profiling.metaphlan3
.filter{
if (it[0].is_fasta) log.warn "[nf-core/taxprofiler] MetaPhlAn3 currently does not accept FASTA files as input. Skipping MetaPhlAn3 for sample ${it[0].id}."
!it[0].is_fasta
}
.multiMap {
it ->
reads: [it[0] + it[2], it[1]]
db: it[3]
}
METAPHLAN3 ( ch_input_for_metaphlan3.reads, ch_input_for_metaphlan3.db )
ch_versions = ch_versions.mix( METAPHLAN3.out.versions.first() )
ch_raw_profiles = ch_raw_profiles.mix( METAPHLAN3.out.biom )
}
if ( params.run_kaiju ) {
ch_input_for_kaiju = ch_input_for_profiling.kaiju
.multiMap {
it ->
reads: [it[0] + it[2], it[1]]
db: it[3]
}
KAIJU_KAIJU ( ch_input_for_kaiju.reads, ch_input_for_kaiju.db)
KAIJU_KAIJU2TABLE (KAIJU_KAIJU.out.results, ch_input_for_kaiju.db, params.kaiju_taxon_name)
ch_multiqc_files = ch_multiqc_files.mix( KAIJU_KAIJU2TABLE.out.summary )
ch_versions = ch_versions.mix( KAIJU_KAIJU.out.versions.first() )
ch_raw_classifications = ch_raw_classifications.mix( KAIJU_KAIJU.out.results )
ch_raw_profiles = ch_raw_profiles.mix( KAIJU_KAIJU2TABLE.out.summary )
}
if ( params.run_diamond ) {
ch_input_for_diamond = ch_input_for_profiling.diamond
.multiMap {
it ->
reads: [it[0] + it[2], it[1]]
db: it[3]
}
// diamond only accepts single output file specification, therefore
// this will replace output file!
ch_diamond_reads_format = params.diamond_save_reads ? 'sam' : params.diamond_output_format
DIAMOND_BLASTX ( ch_input_for_diamond.reads, ch_input_for_diamond.db, ch_diamond_reads_format , [] )
ch_versions = ch_versions.mix( DIAMOND_BLASTX.out.versions.first() )
ch_raw_profiles = ch_raw_profiles.mix( DIAMOND_BLASTX.out.tsv )
ch_multiqc_files = ch_multiqc_files.mix( DIAMOND_BLASTX.out.log )
}
if ( params.run_motus ) {
ch_input_for_motus = ch_input_for_profiling.motus
.filter{
if (it[0].is_fasta) log.warn "[nf-core/taxprofiler] mOTUs currently does not accept FASTA files as input. Skipping mOTUs for sample ${it[0].id}."
!it[0].is_fasta
}
.multiMap {
it ->
reads: [it[0] + it[2], it[1]]
db: it[3]
}
MOTUS_PROFILE ( ch_input_for_motus.reads, ch_input_for_motus.db )
ch_versions = ch_versions.mix( MOTUS_PROFILE.out.versions.first() )
ch_raw_profiles = ch_raw_profiles.mix( MOTUS_PROFILE.out.out )
ch_multiqc_files = ch_multiqc_files.mix( MOTUS_PROFILE.out.log )
}
emit:
classifications = ch_raw_classifications
profiles = ch_raw_profiles // channel: [ val(meta), [ reads ] ] - should be text files or biom
versions = ch_versions // channel: [ versions.yml ]
motus_version = params.run_motus ? MOTUS_PROFILE.out.versions.first() : Channel.empty()
mqc = ch_multiqc_files
}

View file

@ -0,0 +1,98 @@
//
// Process short raw reads with AdapterRemoval
//
include { ADAPTERREMOVAL as ADAPTERREMOVAL_SINGLE } from '../../modules/nf-core/modules/adapterremoval/main'
include { ADAPTERREMOVAL as ADAPTERREMOVAL_PAIRED } from '../../modules/nf-core/modules/adapterremoval/main'
include { CAT_FASTQ } from '../../modules/nf-core/modules/cat/fastq/main'
workflow SHORTREAD_ADAPTERREMOVAL {
take:
reads // [[meta], [reads]]
main:
ch_versions = Channel.empty()
ch_multiqc_files = Channel.empty()
ch_input_for_adapterremoval = reads
.branch{
single: it[0].single_end
paired: !it[0].single_end
}
ADAPTERREMOVAL_SINGLE ( ch_input_for_adapterremoval.single, [] )
ADAPTERREMOVAL_PAIRED ( ch_input_for_adapterremoval.paired, [] )
/*
* Due to the ~slightly~ very ugly output implementation of the current AdapterRemoval2 version, each file
* has to be exported in a separate channel and we must manually recombine when necessary.
*/
if ( params.shortread_qc_mergepairs && !params.shortread_qc_excludeunmerged ) {
ch_concat_fastq = Channel.empty()
.mix(
ADAPTERREMOVAL_PAIRED.out.collapsed,
ADAPTERREMOVAL_PAIRED.out.collapsed_truncated,
ADAPTERREMOVAL_PAIRED.out.singles_truncated,
ADAPTERREMOVAL_PAIRED.out.paired_truncated
)
.map { meta, reads ->
def meta_new = meta.clone()
meta_new.single_end = true
[meta_new, reads]
}
.groupTuple()
// Paired-end reads cause a nested tuple during grouping.
// We want to present a flat list of files to `CAT_FASTQ`.
.map { meta, fastq -> [meta, fastq.flatten()] }
CAT_FASTQ(ch_concat_fastq)
ch_adapterremoval_reads_prepped = CAT_FASTQ.out.reads
.mix(ADAPTERREMOVAL_SINGLE.out.singles_truncated)
} else if ( params.shortread_qc_mergepairs && params.shortread_qc_excludeunmerged ) {
ch_concat_fastq = Channel.empty()
.mix(
ADAPTERREMOVAL_PAIRED.out.collapsed,
ADAPTERREMOVAL_PAIRED.out.collapsed_truncated
)
.map { meta, reads ->
def meta_new = meta.clone()
meta_new.single_end = true
[meta_new, reads]
}
.groupTuple()
.map { meta, fastq -> [meta, fastq.flatten()] }
CAT_FASTQ(ch_concat_fastq)
ch_adapterremoval_reads_prepped = CAT_FASTQ.out.reads
.mix(ADAPTERREMOVAL_SINGLE.out.singles_truncated)
} else {
ch_adapterremoval_reads_prepped = ADAPTERREMOVAL_PAIRED.out.paired_truncated
.mix(ADAPTERREMOVAL_SINGLE.out.singles_truncated)
}
ch_versions = ch_versions.mix( ADAPTERREMOVAL_SINGLE.out.versions.first() )
ch_versions = ch_versions.mix( ADAPTERREMOVAL_PAIRED.out.versions.first() )
ch_multiqc_files = ch_multiqc_files.mix(
ADAPTERREMOVAL_PAIRED.out.settings,
ADAPTERREMOVAL_SINGLE.out.settings
)
emit:
reads = ch_adapterremoval_reads_prepped // channel: [ val(meta), [ reads ] ]
versions = ch_versions // channel: [ versions.yml ]
mqc = ch_multiqc_files
}

View file

@ -0,0 +1,33 @@
//
// Check input samplesheet and get read channels
//
include { BBMAP_BBDUK } from '../../modules/nf-core/modules/bbmap/bbduk/main'
include { PRINSEQPLUSPLUS } from '../../modules/nf-core/modules/prinseqplusplus/main'
workflow SHORTREAD_COMPLEXITYFILTERING {
take:
reads // [ [ meta ], [ reads ] ]
main:
ch_versions = Channel.empty()
ch_multiqc_files = Channel.empty()
// fastp complexity filtering is activated via modules.conf in shortread_preprocessing
if ( params.shortread_complexityfilter_tool == 'bbduk' ) {
ch_filtered_reads = BBMAP_BBDUK ( reads, [] ).reads
ch_versions = ch_versions.mix( BBMAP_BBDUK.out.versions.first() )
ch_multiqc_files = ch_multiqc_files.mix( BBMAP_BBDUK.out.log )
} else if ( params.shortread_complexityfilter_tool == 'prinseqplusplus' ) {
ch_filtered_reads = PRINSEQPLUSPLUS ( reads ).good_reads
ch_versions = ch_versions.mix( PRINSEQPLUSPLUS.out.versions.first() )
} else {
ch_filtered_reads = reads
}
emit:
reads = ch_filtered_reads // channel: [ val(meta), [ reads ] ]
versions = ch_versions // channel: [ versions.yml ]
mqc = ch_multiqc_files
}

View file

@ -0,0 +1,55 @@
//
// Process short raw reads with FastP
//
include { FASTP as FASTP_SINGLE } from '../../modules/nf-core/modules/fastp/main'
include { FASTP as FASTP_PAIRED } from '../../modules/nf-core/modules/fastp/main'
workflow SHORTREAD_FASTP {
take:
reads // [[meta], [reads]]
main:
ch_versions = Channel.empty()
ch_multiqc_files = Channel.empty()
ch_input_for_fastp = reads
.branch{
single: it[0]['single_end'] == true
paired: it[0]['single_end'] == false
}
FASTP_SINGLE ( ch_input_for_fastp.single, false, false )
// Last parameter here turns on merging of PE data
FASTP_PAIRED ( ch_input_for_fastp.paired, false, params.shortread_qc_mergepairs )
if ( params.shortread_qc_mergepairs ) {
ch_fastp_reads_prepped_pe = FASTP_PAIRED.out.reads_merged
.map {
meta, reads ->
def meta_new = meta.clone()
meta_new['single_end'] = true
[ meta_new, [ reads ].flatten() ]
}
ch_fastp_reads_prepped = ch_fastp_reads_prepped_pe.mix( FASTP_SINGLE.out.reads )
} else {
ch_fastp_reads_prepped = FASTP_PAIRED.out.reads
.mix( FASTP_SINGLE.out.reads )
}
ch_versions = ch_versions.mix(FASTP_SINGLE.out.versions.first())
ch_versions = ch_versions.mix(FASTP_PAIRED.out.versions.first())
ch_processed_reads = ch_fastp_reads_prepped
ch_multiqc_files = ch_multiqc_files.mix( FASTP_SINGLE.out.json )
ch_multiqc_files = ch_multiqc_files.mix( FASTP_PAIRED.out.json )
emit:
reads = ch_processed_reads // channel: [ val(meta), [ reads ] ]
versions = ch_versions // channel: [ versions.yml ]
mqc = ch_multiqc_files
}

View file

@ -0,0 +1,34 @@
//
// Remove host reads via alignment and export off-target reads
//
include { BOWTIE2_BUILD } from '../../modules/nf-core/modules/bowtie2/build/main'
include { BOWTIE2_ALIGN } from '../../modules/nf-core/modules/bowtie2/align/main'
workflow SHORTREAD_HOSTREMOVAL {
take:
reads // [ [ meta ], [ reads ] ]
reference // /path/to/fasta
index // /path/to/index
main:
ch_versions = Channel.empty()
ch_multiqc_files = Channel.empty()
if ( !params.shortread_hostremoval_index ) {
ch_bowtie2_index = BOWTIE2_BUILD ( reference ).index
ch_versions = ch_versions.mix( BOWTIE2_BUILD.out.versions )
} else {
ch_bowtie2_index = index.first()
}
BOWTIE2_ALIGN ( reads, ch_bowtie2_index, true )
ch_versions = ch_versions.mix( BOWTIE2_ALIGN.out.versions.first() )
ch_multiqc_files = ch_multiqc_files.mix( BOWTIE2_ALIGN.out.log )
emit:
reads = BOWTIE2_ALIGN.out.fastq // channel: [ val(meta), [ reads ] ]
versions = ch_versions // channel: [ versions.yml ]
mqc = ch_multiqc_files
}

View file

@ -0,0 +1,39 @@
//
// Perform read trimming and merging
//
include { SHORTREAD_FASTP } from './shortread_fastp'
include { SHORTREAD_ADAPTERREMOVAL } from './shortread_adapterremoval'
include { FASTQC as FASTQC_PROCESSED } from '../../modules/nf-core/modules/fastqc/main'
workflow SHORTREAD_PREPROCESSING {
take:
reads // [ [ meta ], [ reads ] ]
main:
ch_versions = Channel.empty()
ch_multiqc_files = Channel.empty()
if ( params.shortread_qc_tool == "fastp" ) {
ch_processed_reads = SHORTREAD_FASTP ( reads ).reads
ch_versions = ch_versions.mix( SHORTREAD_FASTP.out.versions )
ch_multiqc_files = ch_multiqc_files.mix( SHORTREAD_FASTP.out.mqc )
} else if ( params.shortread_qc_tool == "adapterremoval" ) {
ch_processed_reads = SHORTREAD_ADAPTERREMOVAL ( reads ).reads
ch_versions = ch_versions.mix( SHORTREAD_ADAPTERREMOVAL.out.versions )
ch_multiqc_files = ch_multiqc_files.mix( SHORTREAD_ADAPTERREMOVAL.out.mqc )
} else {
ch_processed_reads = reads
}
FASTQC_PROCESSED ( ch_processed_reads )
ch_versions = ch_versions.mix( FASTQC_PROCESSED.out.versions )
ch_multiqc_files = ch_multiqc_files.mix( FASTQC_PROCESSED.out.zip )
emit:
reads = ch_processed_reads // channel: [ val(meta), [ reads ] ]
versions = ch_versions // channel: [ versions.yml ]
mqc = ch_multiqc_files
}

Some files were not shown because too many files have changed in this diff Show more