1
0
Fork 0
mirror of https://github.com/MillironX/taxprofiler.git synced 2024-11-13 07:03:10 +00:00

Merge branch 'dev' of github.com:nf-core/taxprofiler into add-filtlong

This commit is contained in:
James Fellows Yates 2022-05-31 13:05:52 +02:00
commit 310ac6b450
31 changed files with 859 additions and 233 deletions

View file

@ -28,6 +28,3 @@ jobs:
"outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/taxprofiler/results-${{ github.sha }}"
}
profiles: test_full,aws_tower
nextflow_config: |
process.errorStrategy = 'retry'
process.maxRetries = 3

View file

@ -23,6 +23,3 @@ jobs:
"outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/taxprofiler/results-test-${{ github.sha }}"
}
profiles: test,aws_tower
nextflow_config: |
process.errorStrategy = 'retry'
process.maxRetries = 3

View file

@ -13,7 +13,7 @@ jobs:
- name: Check PRs
if: github.repository == 'nf-core/taxprofiler'
run: |
"{ [[ ${{github.event.pull_request.head.repo.full_name }} == nf-core/taxprofiler ]] && [[ $GITHUB_HEAD_REF = "dev" ]]; } || [[ $GITHUB_HEAD_REF == "patch" ]]"
{ [[ ${{github.event.pull_request.head.repo.full_name }} == nf-core/taxprofiler ]] && [[ $GITHUB_HEAD_REF = "dev" ]]; } || [[ $GITHUB_HEAD_REF == "patch" ]]
# If the above check failed, post a comment on the PR explaining the failure
# NOTE - this doesn't currently work if the PR is coming from a fork, due to limitations in GitHub actions secrets
@ -42,4 +42,3 @@ jobs:
Thanks again for your contribution!
repo-token: ${{ secrets.GITHUB_TOKEN }}
allow-repeats: false
#

View file

@ -38,7 +38,7 @@ jobs:
- "--shortread_qc_tool adapterremoval --shortread_qc_mergepairs --shortread_qc_excludeunmerged"
- "--shortread_qc_tool adapterremoval --shortread_qc_mergepairs"
- "--shortread_complexityfilter_tool bbduk"
- "--shortread_complexityfilter_tool prinseq"
- "--shortread_complexityfilter_tool prinseqplusplus"
- "--perform_runmerging"
- "--perform_runmerging --shortread_qc_mergepairs"
- "--shortread_complexityfilter false --perform_shortread_hostremoval"
@ -71,5 +71,3 @@ jobs:
# Remember that you can parallelise this by using strategy.matrix
run: |
nextflow run ${GITHUB_WORKSPACE} -profile test,docker --outdir ./results ${{ matrix.parameters }}
#

55
.github/workflows/fix-linting.yml vendored Normal file
View file

@ -0,0 +1,55 @@
name: Fix linting from a comment
on:
issue_comment:
types: [created]
jobs:
deploy:
# Only run if comment is on a PR with the main repo, and if it contains the magic keywords
if: >
contains(github.event.comment.html_url, '/pull/') &&
contains(github.event.comment.body, '@nf-core-bot fix linting') &&
github.repository == 'nf-core/taxprofiler'
runs-on: ubuntu-latest
steps:
# Use the @nf-core-bot token to check out so we can push later
- uses: actions/checkout@v3
with:
token: ${{ secrets.nf_core_bot_auth_token }}
# Action runs on the issue comment, so we don't get the PR by default
# Use the gh cli to check out the PR
- name: Checkout Pull Request
run: gh pr checkout ${{ github.event.issue.number }}
env:
GITHUB_TOKEN: ${{ secrets.nf_core_bot_auth_token }}
- uses: actions/setup-node@v2
- name: Install Prettier
run: npm install -g prettier @prettier/plugin-php
# Check that we actually need to fix something
- name: Run 'prettier --check'
id: prettier_status
run: |
if prettier --check ${GITHUB_WORKSPACE}; then
echo "::set-output name=result::pass"
else
echo "::set-output name=result::fail"
fi
- name: Run 'prettier --write'
if: steps.prettier_status.outputs.result == 'fail'
run: prettier --write ${GITHUB_WORKSPACE}
- name: Commit & push changes
if: steps.prettier_status.outputs.result == 'fail'
run: |
git config user.email "core@nf-co.re"
git config user.name "nf-core-bot"
git config push.default upstream
git add .
git status
git commit -m "[automated] Fix linting with Prettier"
git push

View file

@ -48,7 +48,7 @@ jobs:
wget -qO- get.nextflow.io | bash
sudo mv nextflow /usr/local/bin/
- uses: actions/setup-python@v1
- uses: actions/setup-python@v3
with:
python-version: "3.6"
architecture: "x64"
@ -78,5 +78,3 @@ jobs:
lint_log.txt
lint_results.md
PR_number.txt
#

View file

@ -26,4 +26,3 @@ jobs:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
number: ${{ steps.pr_number.outputs.pr_number }}
path: linting-logs/lint_results.md
#

View file

@ -1,2 +1,10 @@
email_template.html
.nextflow*
work/
data/
results/
.DS_Store
testing/
testing*
tests/
*.pyc

View file

@ -2,17 +2,18 @@
[![GitHub Actions CI Status](https://github.com/nf-core/taxprofiler/workflows/nf-core%20CI/badge.svg)](https://github.com/nf-core/taxprofiler/actions?query=workflow%3A%22nf-core+CI%22)
[![GitHub Actions Linting Status](https://github.com/nf-core/taxprofiler/workflows/nf-core%20linting/badge.svg)](https://github.com/nf-core/taxprofiler/actions?query=workflow%3A%22nf-core+linting%22)
[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/taxprofiler/results)
[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.XXXXXXX)
[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?logo=Amazon%20AWS)](https://nf-co.re/taxprofiler/results)
[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8)](https://doi.org/10.5281/zenodo.XXXXXXX)
[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A521.10.3-23aa62.svg?labelColor=000000)](https://www.nextflow.io/)
[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/)
[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/)
[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)
[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A521.10.3-23aa62.svg)](https://www.nextflow.io/)
[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?logo=anaconda)](https://docs.conda.io/en/latest/)
[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?logo=docker)](https://www.docker.com/)
[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg)](https://sylabs.io/docs/)
[![Launch on Nextflow Tower](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Nextflow%20Tower-%234256e7)](https://tower.nf/launch?pipeline=https://github.com/nf-core/taxprofiler)
[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23taxprofiler-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/taxprofiler)
[![Follow on Twitter](http://img.shields.io/badge/twitter-%40nf__core-1DA1F2?labelColor=000000&logo=twitter)](https://twitter.com/nf_core)
[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core)
[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23taxprofiler-4A154B?logo=slack)](https://nfcore.slack.com/channels/taxprofiler)
[![Follow on Twitter](http://img.shields.io/badge/twitter-%40nf__core-1DA1F2?logo=twitter)](https://twitter.com/nf_core)
[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?logo=youtube)](https://www.youtube.com/c/nf-core)
## Introduction

View file

@ -1,111 +1,53 @@
<html>
<head>
<meta charset="utf-8" />
<meta http-equiv="X-UA-Compatible" content="IE=edge" />
<meta name="viewport" content="width=device-width, initial-scale=1" />
<head>
<meta charset="utf-8">
<meta http-equiv="X-UA-Compatible" content="IE=edge">
<meta name="viewport" content="width=device-width, initial-scale=1">
<!-- prettier-ignore -->
<meta name="description" content="nf-core/taxprofiler: Taxonomic profiling of shotgun metagenomic data" />
<title>nf-core/taxprofiler Pipeline Report</title>
</head>
<body>
<div style="font-family: Helvetica, Arial, sans-serif; padding: 30px; max-width: 800px; margin: 0 auto">
<img src="cid:nfcorepipelinelogo" />
<meta name="description" content="nf-core/taxprofiler: Taxonomic profiling of shotgun metagenomic data">
<title>nf-core/taxprofiler Pipeline Report</title>
</head>
<body>
<div style="font-family: Helvetica, Arial, sans-serif; padding: 30px; max-width: 800px; margin: 0 auto;">
<h1>nf-core/taxprofiler v${version}</h1>
<h2>Run Name: $runName</h2>
<img src="cid:nfcorepipelinelogo">
<% if (!success){ out << """
<div
style="
color: #a94442;
background-color: #f2dede;
border-color: #ebccd1;
padding: 15px;
margin-bottom: 20px;
border: 1px solid transparent;
border-radius: 4px;
"
>
<h4 style="margin-top: 0; color: inherit">nf-core/taxprofiler execution completed unsuccessfully!</h4>
<h1>nf-core/taxprofiler v${version}</h1>
<h2>Run Name: $runName</h2>
<% if (!success){
out << """
<div style="color: #a94442; background-color: #f2dede; border-color: #ebccd1; padding: 15px; margin-bottom: 20px; border: 1px solid transparent; border-radius: 4px;">
<h4 style="margin-top:0; color: inherit;">nf-core/taxprofiler execution completed unsuccessfully!</h4>
<p>The exit status of the task that caused the workflow execution to fail was: <code>$exitStatus</code>.</p>
<p>The full error message was:</p>
<pre style="white-space: pre-wrap; overflow: visible; margin-bottom: 0">${errorReport}</pre>
</div>
""" } else { out << """
<div
style="
color: #3c763d;
background-color: #dff0d8;
border-color: #d6e9c6;
padding: 15px;
margin-bottom: 20px;
border: 1px solid transparent;
border-radius: 4px;
"
>
nf-core/taxprofiler execution completed successfully!
</div>
""" } %>
<p>The workflow was completed at <strong>$dateComplete</strong> (duration: <strong>$duration</strong>)</p>
<p>The command used to launch the workflow was as follows:</p>
<pre
style="
white-space: pre-wrap;
overflow: visible;
background-color: #ededed;
padding: 15px;
border-radius: 4px;
margin-bottom: 30px;
"
>
$commandLine</pre
>
<h3>Pipeline Configuration:</h3>
<table
style="
width: 100%;
max-width: 100%;
border-spacing: 0;
border-collapse: collapse;
border: 0;
margin-bottom: 30px;
"
>
<tbody style="border-bottom: 1px solid #ddd">
<% out << summary.collect{ k,v -> "
<tr>
<th
style="
text-align: left;
padding: 8px 0;
line-height: 1.42857143;
vertical-align: top;
border-top: 1px solid #ddd;
"
>
$k
</th>
<td
style="
text-align: left;
padding: 8px;
line-height: 1.42857143;
vertical-align: top;
border-top: 1px solid #ddd;
"
>
<pre style="white-space: pre-wrap; overflow: visible">$v</pre>
</td>
</tr>
" }.join("\n") %>
</tbody>
</table>
<p>nf-core/taxprofiler</p>
<p><a href="https://github.com/nf-core/taxprofiler">https://github.com/nf-core/taxprofiler</a></p>
<pre style="white-space: pre-wrap; overflow: visible; margin-bottom: 0;">${errorReport}</pre>
</div>
</body>
"""
} else {
out << """
<div style="color: #3c763d; background-color: #dff0d8; border-color: #d6e9c6; padding: 15px; margin-bottom: 20px; border: 1px solid transparent; border-radius: 4px;">
nf-core/taxprofiler execution completed successfully!
</div>
"""
}
%>
<p>The workflow was completed at <strong>$dateComplete</strong> (duration: <strong>$duration</strong>)</p>
<p>The command used to launch the workflow was as follows:</p>
<pre style="white-space: pre-wrap; overflow: visible; background-color: #ededed; padding: 15px; border-radius: 4px; margin-bottom:30px;">$commandLine</pre>
<h3>Pipeline Configuration:</h3>
<table style="width:100%; max-width:100%; border-spacing: 0; border-collapse: collapse; border:0; margin-bottom: 30px;">
<tbody style="border-bottom: 1px solid #ddd;">
<% out << summary.collect{ k,v -> "<tr><th style='text-align:left; padding: 8px 0; line-height: 1.42857143; vertical-align: top; border-top: 1px solid #ddd;'>$k</th><td style='text-align:left; padding: 8px; line-height: 1.42857143; vertical-align: top; border-top: 1px solid #ddd;'><pre style='white-space: pre-wrap; overflow: visible;'>$v</pre></td></tr>" }.join("\n") %>
</tbody>
</table>
<p>nf-core/taxprofiler</p>
<p><a href="https://github.com/nf-core/taxprofiler">https://github.com/nf-core/taxprofiler</a></p>
</div>
</body>
</html>

View file

@ -54,7 +54,8 @@ process {
params.shortread_qc_skipadaptertrim ? "--disable_adapter_trimming" : "",
params.shortread_qc_adapter1 ? "--adapter_sequence ${params.shortread_qc_adapter1}" : "",
// filtering options
"--length_required ${params.shortread_qc_minlength}"
"--length_required ${params.shortread_clipmerge_minlength}",
(params.perform_shortread_complexityfilter && params.shortread_complexityfilter_tool == 'fastp') ? "--low_complexity_filter --complexity_threshold ${params.shortread_complexityfilter_fastp_threshold}" : ''
].join(' ').trim()
ext.prefix = { "${meta.id}_${meta.run_accession}" }
publishDir = [
@ -74,7 +75,8 @@ process {
params.shortread_qc_adapter1 ? "--adapter_sequence ${params.shortread_qc_adapter1}" : "",
params.shortread_qc_adapter2 ? "--adapter_sequence_r2 ${params.shortread_qc_adapter2}" : "--detect_adapter_for_pe",
// filtering options
"--length_required ${params.shortread_qc_minlength}"
"--length_required ${params.shortread_clipmerge_minlength}",
params.perform_shortread_complexityfilter && params.shortread_complexityfilter_tool == 'fastp' ? "--low_complexity_filter --complexity_threshold ${params.shortread_complexityfilter_fastp_threshold}" : ''
].join(' ').trim()
ext.prefix = { "${meta.id}_${meta.run_accession}" }
publishDir = [
@ -180,6 +182,47 @@ process {
]
}
withName: MINIMAP2_INDEX {
ext.args = '-x map-ont'
publishDir = [
path: { "${params.outdir}/minimap2/index" },
mode: params.publish_dir_mode,
enabled: params.save_hostremoval_index,
pattern: 'minimap2'
]
}
withName: MINIMAP2_ALIGN {
ext.prefix = { "${meta.id}_${meta.run_accession}" }
publishDir = [
path: { "${params.outdir}/minimap2/align" },
mode: params.publish_dir_mode,
enabled: params.save_hostremoval_mapped,
pattern: '*.bam'
]
}
withName: SAMTOOLS_VIEW {
ext.args = '-f 4'
ext.prefix = { "${meta.id}.mapped.sorted" }
publishDir = [
path: { "${params.outdir}/samtools/view" },
mode: params.publish_dir_mode,
enabled: params.save_hostremoval_unmapped,
pattern: '*.bam'
]
}
withName: SAMTOOLS_BAM2FQ {
ext.prefix = { "${meta.id}_${meta.run_accession}" }
publishDir = [
path: { "${params.outdir}/samtools/bam2fq" },
mode: params.publish_dir_mode,
enabled: params.save_hostremoval_unmapped,
pattern: '*.fq.gz'
]
}
withName: BBMAP_BBDUK {
ext.args = [
"entropy=${params.shortread_complexityfilter_entropy}",

View file

@ -28,7 +28,9 @@ params {
perform_longread_qc = false
perform_shortread_complexityfilter = true
perform_shortread_hostremoval = true
shortread_hostremoval_reference = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta'
perform_longread_hostremoval = true
perform_runmerging = true
hostremoval_reference = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta'
run_kaiju = true
run_kraken2 = true
run_malt = true

View file

@ -0,0 +1,46 @@
/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Nextflow config file for running minimal tests
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Defines input files and everything required to run a fast and simple pipeline test.
Use as follows:
nextflow run nf-core/taxprofiler -profile test,<docker/singularity> --outdir <OUTDIR>
----------------------------------------------------------------------------------------
*/
params {
config_profile_name = 'Test profile'
config_profile_description = 'Minimal test dataset skipping all preprocessing to check pipeline function'
// Limit resources so that this can run on GitHub Actions
max_cpus = 2
max_memory = '6.GB'
max_time = '6.h'
// Input data
// TODO nf-core: Specify the paths to your test data on nf-core/test-datasets
// TODO nf-core: Give any required params for the test so that command line flags are not needed
input = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv'
databases = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database.csv'
perform_shortread_clipmerge = false
perform_longread_clip = false
perform_shortread_complexityfilter = false
perform_shortread_hostremoval = false
perform_longread_hostremoval = false
perform_runmerging = false
hostremoval_reference = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta'
run_kaiju = true
run_kraken2 = true
run_malt = true
run_metaphlan3 = true
run_centrifuge = true
run_diamond = true
}
process {
withName: MALT_RUN {
maxForks = 1
}
}

View file

@ -0,0 +1,46 @@
/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Nextflow config file for running minimal tests
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Defines input files and everything required to run a fast and simple pipeline test.
Use as follows:
nextflow run nf-core/taxprofiler -profile test,<docker/singularity> --outdir <OUTDIR>
----------------------------------------------------------------------------------------
*/
params {
config_profile_name = 'Test profile'
config_profile_description = 'Minimal test dataset without performing any profiling to check pipeline function'
// Limit resources so that this can run on GitHub Actions
max_cpus = 2
max_memory = '6.GB'
max_time = '6.h'
// Input data
// TODO nf-core: Specify the paths to your test data on nf-core/test-datasets
// TODO nf-core: Give any required params for the test so that command line flags are not needed
input = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv'
databases = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database.csv'
perform_shortread_clipmerge = true
perform_longread_clip = true
perform_shortread_complexityfilter = true
perform_shortread_hostremoval = true
perform_longread_hostremoval = true
perform_runmerging = true
hostremoval_reference = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta'
run_kaiju = false
run_kraken2 = false
run_malt = false
run_metaphlan3 = false
run_centrifuge = false
run_diamond = false
}
process {
withName: MALT_RUN {
maxForks = 1
}
}

View file

@ -183,21 +183,21 @@ Complexity filtering can be activated via the `--perform_shortread_complexityfil
Complexity filtering is primarily a run-time optimisation step. It is not necessary for accurate taxonomic profiling, however it can speed up run-time of each tool by removing reads with low-diversity of nucleotides (e.g. with mono-nucleotide - `AAAAAAAA`, or di-nucleotide repeats `GAGAGAGAGAGAGAG`) that have a low-chance of giving an informative taxonomic ID as they can be associated with many different taxa. Removing these reads therefore saves computational time and resources.
There are currently two options for short-read complexity filtering: [`bbduk`](https://jgi.doe.gov/data-and-tools/software-tools/bbtools/bb-tools-user-guide/bbduk-guide/) and [`prinseq++`](https://jgi.doe.gov/data-and-tools/software-tools/bbtools/bb-tools-user-guide/bbduk-guide/).
There are currently three options for short-read complexity filtering: [`bbduk`](https://jgi.doe.gov/data-and-tools/software-tools/bbtools/bb-tools-user-guide/bbduk-guide/), [`prinseq++`](https://github.com/Adrian-Cantu/PRINSEQ-plus-plus), and [`fastp`](https://github.com/OpenGene/fastp#low-complexity-filter).
The tools offer different algorithms and parameters for removing low complexity reads. We therefore recommend reviewing the pipeline's [parameter documentation](https://nf-co.re/taxprofiler/parameters) and the documentation of both tools (see links above) to decide on optimal methods and parameters for your dataset.
The tools offer different algorithms and parameters for removing low complexity reads. We therefore recommend reviewing the pipeline's [parameter documentation](https://nf-co.re/taxprofiler/parameters) and the documentation of the tools (see links above) to decide on optimal methods and parameters for your dataset.
You can optionally save the FASTQ output of the run merging with the `--save_complexityfiltered_reads`.
You can optionally save the FASTQ output of the run merging with the `--save_complexityfiltered_reads`. If running with `fastp`, complexity filtering happens inclusively within the earlier shortread preprocessing step. Therefore there will not be an independent pipeline step for complexity filtering, and no independent FASTQ file (i.e. `--save_complexityfiltered_reads` will be ignored) - your complexity filtered reads will also be in the `fastp/` folder in the same file(s) as the preprocessed read.
#### Host Removal
Removal of possible-host reads from FASTQ files prior profiling can be activated with `--perform_shortread_hostremoval`
Removal of possible-host reads from FASTQ files prior profiling can be activated with `--perform_shortread_hostremoval` or `--perform_longread_hostremoval`.
Similarly to complexity filtering, host-removal can be useful for runtime optimisation and reduction in misclassified reads. It is not always necessary to report classification of reads from a host when you already know the host of the sample, therefore you can gain a run-time and computational advantage by removing these prior typically resource-heavy profiling with more efficient methods. Furthermore, particularly with human samples, you can reduce the number of false positives during profiling that occur due to host-sequence contamination in reference genomes on public databases.
nf-core/taxprofiler currently offers host-removal via alignment against a reference genome with Bowtie2, and the use of the unaligned reads for downstream profiling.
You can supply your reference genome in FASTA format with `--shortread_hostremoval_reference`. You can also optionally supply a directory containing pre-indexed Bowtie2 index files with `--shortread_hostremoval_index`, however nf-core/taxprofiler will generate this for you if necessary. Pre-supplying the directory of index files can greatly speed up the process, and these can be re-used.
You can supply your reference genome in FASTA format with `--hostremoval_reference`. You can also optionally supply a directory containing pre-indexed Bowtie2 index files with `--shortread_hostremoval_index` or `--longread_hostremoval_index`, however nf-core/taxprofiler will generate this for you if necessary. Pre-supplying the directory of index files can greatly speed up the process, and these can be re-used.
> 💡 If you have multiple taxa or sequences you wish to remove (e.g., the host genome and then also PhiX - common quality-control reagent during sequencing) you can simply concatenate the FASTAs of each taxa or sequences into a single reference file.

View file

@ -55,6 +55,12 @@
"git_sha": "2d38566eca4cc15142b2ffa7c11837569b39aece"
},
"metaphlan3": {
"git_sha": "ed4dd1a928ebf4308efb720de878045f7773f8e2"
},
"minimap2/align": {
"git_sha": "1a5a9e7b4009dcf34e6867dd1a5a1d9a718b027b"
},
"minimap2/index": {
"git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d"
},
"multiqc": {
@ -66,6 +72,12 @@
"prinseqplusplus": {
"git_sha": "f1c5384c31e985591716afdd732cf8c2ae29d05b"
},
"samtools/bam2fq": {
"git_sha": "5510ea39fe638594bc26ac34cadf4a84bf27d159"
},
"samtools/view": {
"git_sha": "6b64f9cb6c3dd3577931cc3cd032d6fb730000ce"
},
"untar": {
"git_sha": "e080f4c8acf5760039ed12ec1f206170f3f9a918"
}

View file

@ -23,7 +23,7 @@ process METAPHLAN3 {
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def input_type = ("$input".endsWith(".fastq.gz")) ? "--input_type fastq" : ("$input".contains(".fasta")) ? "--input_type fasta" : ("$input".endsWith(".bowtie2out.txt")) ? "--input_type bowtie2out" : "--input_type sam"
def input_type = ("$input".endsWith(".fastq.gz") || "$input".endsWith(".fq.gz")) ? "--input_type fastq" : ("$input".contains(".fasta")) ? "--input_type fasta" : ("$input".endsWith(".bowtie2out.txt")) ? "--input_type bowtie2out" : "--input_type sam"
def input_data = ("$input_type".contains("fastq")) && !meta.single_end ? "${input[0]},${input[1]}" : "$input"
def bowtie2_out = "$input_type" == "--input_type bowtie2out" || "$input_type" == "--input_type sam" ? '' : "--bowtie2out ${prefix}.bowtie2out.txt"

View file

@ -0,0 +1,48 @@
process MINIMAP2_ALIGN {
tag "$meta.id"
label 'process_medium'
conda (params.enable_conda ? 'bioconda::minimap2=2.21 bioconda::samtools=1.12' : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/mulled-v2-66534bcbb7031a148b13e2ad42583020b9cd25c4:1679e915ddb9d6b4abda91880c4b48857d471bd8-0' :
'quay.io/biocontainers/mulled-v2-66534bcbb7031a148b13e2ad42583020b9cd25c4:1679e915ddb9d6b4abda91880c4b48857d471bd8-0' }"
input:
tuple val(meta), path(reads)
path reference
val bam_format
val cigar_paf_format
val cigar_bam
output:
tuple val(meta), path("*.paf"), optional: true, emit: paf
tuple val(meta), path("*.bam"), optional: true, emit: bam
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def input_reads = meta.single_end ? "$reads" : "${reads[0]} ${reads[1]}"
def bam_output = bam_format ? "-a | samtools sort | samtools view -@ ${task.cpus} -b -h -o ${prefix}.bam" : "-o ${prefix}.paf"
def cigar_paf = cigar_paf_format && !bam_format ? "-c" : ''
def set_cigar_bam = cigar_bam && bam_format ? "-L" : ''
"""
minimap2 \\
$args \\
-t $task.cpus \\
$reference \\
$input_reads \\
$cigar_paf \\
$set_cigar_bam \\
$bam_output
cat <<-END_VERSIONS > versions.yml
"${task.process}":
minimap2: \$(minimap2 --version 2>&1)
END_VERSIONS
"""
}

View file

@ -0,0 +1,65 @@
name: minimap2_align
description: A versatile pairwise aligner for genomic and spliced nucleotide sequences
keywords:
- align
- fasta
- fastq
- genome
- paf
- reference
tools:
- minimap2:
description: |
A versatile pairwise aligner for genomic and spliced nucleotide sequences.
homepage: https://github.com/lh3/minimap2
documentation: https://github.com/lh3/minimap2#uguide
licence: ["MIT"]
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- reads:
type: file
description: |
List of input FASTA or FASTQ files of size 1 and 2 for single-end
and paired-end data, respectively.
- reference:
type: file
description: |
Reference database in FASTA format.
- bam_format:
type: boolean
description: Specify that output should be in BAM format
- cigar_paf_format:
type: boolean
description: Specify that output CIGAR should be in PAF format
- cigar_bam:
type: boolean
description: |
Write CIGAR with >65535 ops at the CG tag. This is recommended when
doing XYZ (https://github.com/lh3/minimap2#working-with-65535-cigar-operations)
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- paf:
type: file
description: Alignment in PAF format
pattern: "*.paf"
- bam:
type: file
description: Alignment in BAM format
pattern: "*.bam"
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
authors:
- "@heuermh"
- "@sofstam"
- "@sateeshperi"
- "@jfy133"

View file

@ -0,0 +1,33 @@
process MINIMAP2_INDEX {
label 'process_medium'
conda (params.enable_conda ? 'bioconda::minimap2=2.21' : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/minimap2:2.21--h5bf99c6_0' :
'quay.io/biocontainers/minimap2:2.21--h5bf99c6_0' }"
input:
path fasta
output:
path "*.mmi" , emit: index
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
"""
minimap2 \\
-t $task.cpus \\
-d ${fasta.baseName}.mmi \\
$args \\
$fasta
cat <<-END_VERSIONS > versions.yml
"${task.process}":
minimap2: \$(minimap2 --version 2>&1)
END_VERSIONS
"""
}

View file

@ -0,0 +1,30 @@
name: minimap2_index
description: Provides fasta index required by minimap2 alignment.
keywords:
- index
- fasta
- reference
tools:
- minimap2:
description: |
A versatile pairwise aligner for genomic and spliced nucleotide sequences.
homepage: https://github.com/lh3/minimap2
documentation: https://github.com/lh3/minimap2#uguide
licence: ["MIT"]
input:
- fasta:
type: file
description: |
Reference database in FASTA format.
output:
- mmi:
type: file
description: Minimap2 fasta index.
pattern: "*.mmi"
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
authors:
- "@yuukiiwa"
- "@drpatelh"

View file

@ -0,0 +1,56 @@
process SAMTOOLS_BAM2FQ {
tag "$meta.id"
label 'process_low'
conda (params.enable_conda ? "bioconda::samtools=1.15.1" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/samtools:1.15.1--h1170115_0' :
'quay.io/biocontainers/samtools:1.15.1--h1170115_0' }"
input:
tuple val(meta), path(inputbam)
val split
output:
tuple val(meta), path("*.fq.gz"), emit: reads
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
if (split){
"""
samtools \\
bam2fq \\
$args \\
-@ $task.cpus \\
-1 ${prefix}_1.fq.gz \\
-2 ${prefix}_2.fq.gz \\
-0 ${prefix}_other.fq.gz \\
-s ${prefix}_singleton.fq.gz \\
$inputbam
cat <<-END_VERSIONS > versions.yml
"${task.process}":
samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
END_VERSIONS
"""
} else {
"""
samtools \\
bam2fq \\
$args \\
-@ $task.cpus \\
$inputbam | gzip --no-name > ${prefix}_interleaved.fq.gz
cat <<-END_VERSIONS > versions.yml
"${task.process}":
samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
END_VERSIONS
"""
}
}

View file

@ -0,0 +1,55 @@
name: samtools_bam2fq
description: |
The module uses bam2fq method from samtools to
convert a SAM, BAM or CRAM file to FASTQ format
keywords:
- bam2fq
- samtools
- fastq
tools:
- samtools:
description: Tools for dealing with SAM, BAM and CRAM files
homepage: None
documentation: http://www.htslib.org/doc/1.1/samtools.html
tool_dev_url: None
doi: ""
licence: ["MIT"]
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- inputbam:
type: file
description: BAM/CRAM/SAM file
pattern: "*.{bam,cram,sam}"
- split:
type: boolean
description: |
TRUE/FALSE value to indicate if reads should be separated into
/1, /2 and if present other, or singleton.
Note: choosing TRUE will generate 4 different files.
Choosing FALSE will produce a single file, which will be interleaved in case
the input contains paired reads.
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- reads:
type: file
description: |
FASTQ files, which will be either a group of 4 files (read_1, read_2, other and singleton)
or a single interleaved .fq.gz file if the user chooses not to split the reads.
pattern: "*.fq.gz"
authors:
- "@lescai"

View file

@ -0,0 +1,56 @@
process SAMTOOLS_VIEW {
tag "$meta.id"
label 'process_medium'
conda (params.enable_conda ? "bioconda::samtools=1.15.1" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/samtools:1.15.1--h1170115_0' :
'quay.io/biocontainers/samtools:1.15.1--h1170115_0' }"
input:
tuple val(meta), path(input), path(index)
path fasta
output:
tuple val(meta), path("*.bam") , emit: bam , optional: true
tuple val(meta), path("*.cram"), emit: cram, optional: true
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
def args2 = task.ext.args2 ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def reference = fasta ? "--reference ${fasta} -C" : ""
def file_type = input.getExtension()
if ("$input" == "${prefix}.${file_type}") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!"
"""
samtools \\
view \\
--threads ${task.cpus-1} \\
${reference} \\
$args \\
$input \\
$args2 \\
> ${prefix}.${file_type}
cat <<-END_VERSIONS > versions.yml
"${task.process}":
samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
END_VERSIONS
"""
stub:
def prefix = task.ext.prefix ?: "${meta.id}"
"""
touch ${prefix}.bam
touch ${prefix}.cram
cat <<-END_VERSIONS > versions.yml
"${task.process}":
samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
END_VERSIONS
"""
}

View file

@ -0,0 +1,57 @@
name: samtools_view
description: filter/convert SAM/BAM/CRAM file
keywords:
- view
- bam
- sam
- cram
tools:
- samtools:
description: |
SAMtools is a set of utilities for interacting with and post-processing
short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li.
These files are generated as output by short read aligners like BWA.
homepage: http://www.htslib.org/
documentation: hhttp://www.htslib.org/doc/samtools.html
doi: 10.1093/bioinformatics/btp352
licence: ["MIT"]
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- input:
type: file
description: BAM/CRAM/SAM file
pattern: "*.{bam,cram,sam}"
- index:
type: optional file
description: BAM.BAI/CRAM.CRAI file
pattern: "*.{.bai,.crai}"
- fasta:
type: optional file
description: Reference file the CRAM was created with
pattern: "*.{fasta,fa}"
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- bam:
type: file
description: filtered/converted BAM/SAM file
pattern: "*.{bam,sam}"
- cram:
type: file
description: filtered/converted CRAM file
pattern: "*.cram"
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
authors:
- "@drpatelh"
- "@joseespinosa"
- "@FriederikeHanssen"

View file

@ -81,6 +81,7 @@ params {
shortread_complexityfilter_bbduk_mask = false
shortread_complexityfilter_prinseqplusplus_mode = 'entropy'
shortread_complexityfilter_prinseqplusplus_dustscore = 0.5
shortread_complexityfilter_fastp_threshold = 30
save_complexityfiltered_reads = false
// run merging
@ -88,12 +89,15 @@ params {
save_runmerged_reads = false
// Host Removal
perform_shortread_hostremoval = false
shortread_hostremoval_reference = null
shortread_hostremoval_index = null
save_hostremoval_index = false
save_hostremoval_mapped = false
save_hostremoval_unmapped = false
perform_shortread_hostremoval = false
perform_longread_hostremoval = false
hostremoval_reference = null
shortread_hostremoval_index = null
longread_hostremoval_index = null
save_hostremoval_index = false
save_hostremoval_mapped = false
save_hostremoval_unmapped = false
// MALT
run_malt = false
@ -133,11 +137,11 @@ try {
// Load nf-core/taxprofiler custom profiles from different institutions.
// Warning: Uncomment only if a pipeline-specific instititutional config already exists on nf-core/configs!
// try {
// includeConfig "${params.custom_config_base}/pipeline/taxprofiler.config"
// } catch (Exception e) {
// System.err.println("WARNING: Could not load nf-core/config/taxprofiler profiles: ${params.custom_config_base}/pipeline/taxprofiler.config")
// }
try {
includeConfig "${params.custom_config_base}/pipeline/taxprofiler.config"
} catch (Exception e) {
System.err.println("WARNING: Could not load nf-core/config/taxprofiler profiles: ${params.custom_config_base}/pipeline/taxprofiler.config")
}
profiles {
@ -189,6 +193,8 @@ profiles {
}
test { includeConfig 'conf/test.config' }
test_full { includeConfig 'conf/test_full.config' }
test_noprofiling { includeConfig 'conf/test_noprofiling.config' }
test_nopreprocessing { includeConfig 'conf/test_preprocessing.config' }
}
// Load igenomes.config if required
@ -228,7 +234,7 @@ trace {
}
dag {
enabled = true
file = "${params.tracedir}/pipeline_dag_${trace_timestamp}.svg"
file = "${params.tracedir}/pipeline_dag_${trace_timestamp}.html"
}
manifest {

View file

@ -319,7 +319,8 @@
},
"shortread_complexityfilter_tool": {
"type": "string",
"default": "bbduk"
"default": "bbduk",
"enum": ["bbduk", "prinseqplusplus", "fastp"]
},
"shortread_complexityfilter_bbduk_windowsize": {
"type": "integer",
@ -362,7 +363,10 @@
"perform_shortread_hostremoval": {
"type": "boolean"
},
"shortread_hostremoval_reference": {
"perform_longread_hostremoval": {
"type": "boolean"
},
"hostremoval_reference": {
"type": "string",
"default": "None"
},
@ -398,6 +402,14 @@
"default": "tsv",
"enum": ["blast", "xml", "txt", "daa", "sam", "tsv", "paf"]
},
"longread_hostremoval_index": {
"type": "string",
"default": "None"
},
"shortread_complexityfilter_fastp_threshold": {
"type": "integer",
"default": 30
},
"longread_qc_run_clip": {
"type": "boolean"
},
@ -415,6 +427,5 @@
"longread_qc_targetnbases": {
"type": "integer",
"default": 500000000
}
}
}

View file

@ -0,0 +1,47 @@
//
// Remove host reads via alignment and export off-target reads
//
include { MINIMAP2_INDEX } from '../../modules/nf-core/modules/minimap2/index/main'
include { MINIMAP2_ALIGN } from '../../modules/nf-core/modules/minimap2/align/main'
include { SAMTOOLS_VIEW } from '../../modules/nf-core/modules/samtools/view/main'
include { SAMTOOLS_BAM2FQ } from '../../modules/nf-core/modules/samtools/bam2fq/main'
workflow LONGREAD_HOSTREMOVAL {
take:
reads // [ [ meta ], [ reads ] ]
reference // /path/to/fasta
index // /path/to/index
main:
ch_versions = Channel.empty()
ch_multiqc_files = Channel.empty()
if ( !params.longread_hostremoval_index ) {
ch_minimap2_index = MINIMAP2_INDEX ( reference ).index
ch_versions = ch_versions.mix( MINIMAP2_INDEX.out.versions )
} else {
ch_minimap2_index = index
}
MINIMAP2_ALIGN ( reads, ch_minimap2_index, true, false, false )
ch_versions = ch_versions.mix( MINIMAP2_ALIGN.out.versions.first() )
ch_minimap2_mapped = MINIMAP2_ALIGN.out.bam
.map {
meta, reads ->
[ meta, reads, [] ]
}
SAMTOOLS_VIEW ( ch_minimap2_mapped , [] )
ch_versions = ch_versions.mix( SAMTOOLS_VIEW.out.versions.first() )
SAMTOOLS_BAM2FQ ( SAMTOOLS_VIEW.out.bam, false )
ch_versions = ch_versions.mix( SAMTOOLS_BAM2FQ.out.versions.first() )
emit:
reads = SAMTOOLS_BAM2FQ.out.reads // channel: [ val(meta), [ reads ] ]
versions = ch_versions // channel: [ versions.yml ]
}

View file

@ -48,7 +48,7 @@ workflow PROFILING {
}
/*
PREPARE PROFILER INPUT CHANNELS
PREPARE PROFILER INPUT CHANNELS & RUN PROFILING
*/
// Each tool as a slightly different input structure and generally separate
@ -56,74 +56,27 @@ workflow PROFILING {
// for each tool and make liberal use of multiMap to keep reads/databases
// channel element order in sync with each other
// MALT: We groupTuple to have all samples in one channel for MALT as database
// loading takes a long time, so we only want to run it once per database
// TODO document somewhere we only accept illumina short reads for MALT?
ch_input_for_malt = ch_input_for_profiling.malt
.filter { it[0]['instrument_platform'] == 'ILLUMINA' }
.map {
it ->
def temp_meta = [ id: it[2]['db_name']] + it[2]
def db = it[3]
[ temp_meta, it[1], db ]
}
.groupTuple(by: [0,2])
.multiMap {
it ->
reads: [ it[0], it[1].flatten() ]
db: it[2]
}
// All subsequent tools can easily run on a per-sample basis
ch_input_for_kraken2 = ch_input_for_profiling.kraken2
.multiMap {
it ->
reads: [ it[0] + it[2], it[1] ]
db: it[3]
}
ch_input_for_centrifuge = ch_input_for_profiling.centrifuge
.filter{
if (it[0].is_fasta) log.warn "[nf-core/taxprofiler] Centrifuge currently does not accept FASTA files as input. Skipping Centrifuge for sample ${it[0].id}."
!it[0].is_fasta
}
.multiMap {
it ->
reads: [ it[0] + it[2], it[1] ]
db: it[3]
}
ch_input_for_metaphlan3 = ch_input_for_profiling.metaphlan3
.filter{
if (it[0].is_fasta) log.warn "[nf-core/taxprofiler] MetaPhlAn3 currently does not accept FASTA files as input. Skipping MetaPhlAn3 for sample ${it[0].id}."
!it[0].is_fasta
}
.multiMap {
it ->
reads: [it[0] + it[2], it[1]]
db: it[3]
}
ch_input_for_kaiju = ch_input_for_profiling.kaiju
.multiMap {
it ->
reads: [it[0] + it[2], it[1]]
db: it[3]
}
ch_input_for_diamond = ch_input_for_profiling.diamond
.multiMap {
it ->
reads: [it[0] + it[2], it[1]]
db: it[3]
}
/*
RUN PROFILING
*/
if ( params.run_malt ) {
// MALT: We groupTuple to have all samples in one channel for MALT as database
// loading takes a long time, so we only want to run it once per database
// TODO document somewhere we only accept illumina short reads for MALT?
ch_input_for_malt = ch_input_for_profiling.malt
.filter { it[0]['instrument_platform'] == 'ILLUMINA' }
.map {
it ->
def temp_meta = [ id: it[2]['db_name']] + it[2]
def db = it[3]
[ temp_meta, it[1], db ]
}
.groupTuple(by: [0,2])
.multiMap {
it ->
reads: [ it[0], it[1].flatten() ]
db: it[2]
}
MALT_RUN ( ch_input_for_malt.reads, params.malt_mode, ch_input_for_malt.db )
ch_maltrun_for_megan = MALT_RUN.out.rma6
@ -143,40 +96,94 @@ workflow PROFILING {
ch_multiqc_files = ch_multiqc_files.mix( MALT_RUN.out.log.collect{it[1]}.ifEmpty([]) )
ch_versions = ch_versions.mix( MALT_RUN.out.versions.first(), MEGAN_RMA2INFO.out.versions.first() )
ch_raw_profiles = ch_raw_profiles.mix( MEGAN_RMA2INFO.out.txt )
}
if ( params.run_kraken2 ) {
ch_input_for_kraken2 = ch_input_for_profiling.kraken2
.multiMap {
it ->
reads: [ it[0] + it[2], it[1] ]
db: it[3]
}
KRAKEN2_KRAKEN2 ( ch_input_for_kraken2.reads, ch_input_for_kraken2.db )
ch_multiqc_files = ch_multiqc_files.mix( KRAKEN2_KRAKEN2.out.txt.collect{it[1]}.ifEmpty([]) )
ch_versions = ch_versions.mix( KRAKEN2_KRAKEN2.out.versions.first() )
ch_raw_profiles = ch_raw_profiles.mix( KRAKEN2_KRAKEN2.out.txt )
}
if ( params.run_centrifuge ) {
ch_input_for_centrifuge = ch_input_for_profiling.centrifuge
.filter{
if (it[0].is_fasta) log.warn "[nf-core/taxprofiler] Centrifuge currently does not accept FASTA files as input. Skipping Centrifuge for sample ${it[0].id}."
!it[0].is_fasta
}
.multiMap {
it ->
reads: [ it[0] + it[2], it[1] ]
db: it[3]
}
CENTRIFUGE_CENTRIFUGE ( ch_input_for_centrifuge.reads, ch_input_for_centrifuge.db, params.centrifuge_save_unaligned, params.centrifuge_save_aligned, params.centrifuge_sam_format )
CENTRIFUGE_KREPORT (CENTRIFUGE_CENTRIFUGE.out.results, ch_input_for_centrifuge.db)
ch_versions = ch_versions.mix( CENTRIFUGE_CENTRIFUGE.out.versions.first() )
ch_raw_profiles = ch_raw_profiles.mix( CENTRIFUGE_KREPORT.out.kreport )
}
if ( params.run_metaphlan3 ) {
ch_input_for_metaphlan3 = ch_input_for_profiling.metaphlan3
.filter{
if (it[0].is_fasta) log.warn "[nf-core/taxprofiler] MetaPhlAn3 currently does not accept FASTA files as input. Skipping MetaPhlAn3 for sample ${it[0].id}."
!it[0].is_fasta
}
.multiMap {
it ->
reads: [it[0] + it[2], it[1]]
db: it[3]
}
METAPHLAN3 ( ch_input_for_metaphlan3.reads, ch_input_for_metaphlan3.db )
ch_versions = ch_versions.mix( METAPHLAN3.out.versions.first() )
ch_raw_profiles = ch_raw_profiles.mix( METAPHLAN3.out.biom )
}
if ( params.run_kaiju ) {
ch_input_for_kaiju = ch_input_for_profiling.kaiju
.multiMap {
it ->
reads: [it[0] + it[2], it[1]]
db: it[3]
}
KAIJU_KAIJU ( ch_input_for_kaiju.reads, ch_input_for_kaiju.db)
KAIJU_KAIJU2TABLE (KAIJU_KAIJU.out.results, ch_input_for_kaiju.db, params.kaiju_taxon_name)
ch_multiqc_files = ch_multiqc_files.mix( KAIJU_KAIJU2TABLE.out.summary.collect{it[1]}.ifEmpty([]) )
ch_versions = ch_versions.mix( KAIJU_KAIJU.out.versions.first() )
ch_raw_profiles = ch_raw_profiles.mix( KAIJU_KAIJU2TABLE.out.summary )
}
if ( params.run_diamond ) {
ch_input_for_diamond = ch_input_for_profiling.diamond
.multiMap {
it ->
reads: [it[0] + it[2], it[1]]
db: it[3]
}
DIAMOND_BLASTX ( ch_input_for_diamond.reads, ch_input_for_diamond.db, params.diamond_output_format )
ch_versions = ch_versions.mix( DIAMOND_BLASTX.out.versions.first() )
ch_raw_profiles = ch_raw_profiles.mix( DIAMOND_BLASTX.out.output )
}
emit:

View file

@ -13,6 +13,7 @@ workflow SHORTREAD_COMPLEXITYFILTERING {
ch_versions = Channel.empty()
ch_multiqc_files = Channel.empty()
// fastp complexity filtering is activated via modules.conf in shortread_preprocessing
if ( params.shortread_complexityfilter_tool == 'bbduk' ) {
ch_filtered_reads = BBMAP_BBDUK ( reads, [] ).reads
ch_versions = ch_versions.mix( BBMAP_BBDUK.out.versions.first() )

View file

@ -11,7 +11,7 @@ WorkflowTaxprofiler.initialise(params, log)
// TODO nf-core: Add all file path parameters for the pipeline to the list below
// Check input path parameters to see if they exist
def checkPathParamList = [ params.input, params.databases, params.shortread_hostremoval_reference,
def checkPathParamList = [ params.input, params.databases, params.hostremoval_reference,
params.shortread_hostremoval_index, params.multiqc_config
]
for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true) } }
@ -20,16 +20,18 @@ for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true
if (params.input ) { ch_input = file(params.input) } else { exit 1, 'Input samplesheet not specified!' }
if (params.databases) { ch_databases = file(params.databases) } else { exit 1, 'Input database sheet not specified!' }
if (params.shortread_qc_mergepairs && params.run_malt ) log.warn "[nf-core/taxprofiler] MALT does not accept uncollapsed paired-short reads. Pairs will be profiled as separate files."
if (params.shortread_qc_excludeunmerged && !params.shortread_qc_mergepairs) exit 1, "ERROR: [nf-core/taxprofiler] cannot include unmerged reads when merging not turned on. Please specify --shortread_qc_mergepairs"
if (params.shortread_clipmerge_mergepairs && params.run_malt ) log.warn "[nf-core/taxprofiler] MALT does not accept uncollapsed paired-reads. Pairs will be profiled as separate files."
if (params.shortread_clipmerge_excludeunmerged && !params.shortread_clipmerge_mergepairs) exit 1, "ERROR: [nf-core/taxprofiler] cannot include unmerged reads when merging not turned on. Please specify --shortread_clipmerge_mergepairs"
if ( (params.longread_qc_run_clip || params.longread_qc_run_filter) & !params.perform_longread_qc ) exit 1, "ERROR: [nf-core/taxprofiler] --longread_qc_run_clip or --longread_qc_run_filter requested but quality-control not turned on. Please specify --perform_long_qc"
if (params.perform_shortread_hostremoval && !params.shortread_hostremoval_reference) { exit 1, "ERROR: [nf-core/taxprofiler] --shortread_hostremoval requested but no --shortread_hostremoval_reference FASTA supplied. Check input." }
if (!params.shortread_hostremoval_reference && params.shortread_hostremoval_reference_index) { exit 1, "ERROR: [nf-core/taxprofiler] --shortread_hostremoval_index provided but no --shortread_hostremoval_reference FASTA supplied. Check input." }
if (params.shortread_complexityfilter_tool == 'fastp' && ( params.perform_shortread_clipmerge == false || params.shortread_clipmerge_tool != 'fastp' )) exit 1, "ERROR: [nf-core/taxprofiler] cannot use fastp complexity filtering if preprocessing not turned on and/or tool is not fastp. Please specify --perform_shortread_clipmerge and/or --shortread_clipmerge_tool 'fastp'"
if (params.shortread_hostremoval_reference ) { ch_reference = file(params.shortread_hostremoval_reference) }
if (params.shortread_hostremoval_index ) { ch_reference_index = file(params.shortread_hostremoval_index ) } else { ch_reference_index = [] }
if (params.perform_shortread_hostremoval && !params.hostremoval_reference) { exit 1, "ERROR: [nf-core/taxprofiler] --shortread_hostremoval requested but no --hostremoval_reference FASTA supplied. Check input." }
if (!params.hostremoval_reference && params.hostremoval_reference_index) { exit 1, "ERROR: [nf-core/taxprofiler] --shortread_hostremoval_index provided but no --hostremoval_reference FASTA supplied. Check input." }
if (params.hostremoval_reference ) { ch_reference = file(params.hostremoval_reference) }
if (params.shortread_hostremoval_index ) { ch_shortread_reference_index = file(params.shortread_hostremoval_index ) } else { ch_shortread_reference_index = [] }
if (params.longread_hostremoval_index ) { ch_longread_reference_index = file(params.longread_hostremoval_index ) } else { ch_longread_reference_index = [] }
/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@ -49,12 +51,13 @@ ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath(params.multi
//
// SUBWORKFLOW: Consisting of a mix of local and nf-core/modules
//
include { INPUT_CHECK } from '../subworkflows/local/input_check'
include { INPUT_CHECK } from '../subworkflows/local/input_check'
include { DB_CHECK } from '../subworkflows/local/db_check'
include { SHORTREAD_PREPROCESSING } from '../subworkflows/local/shortread_preprocessing'
include { LONGREAD_PREPROCESSING } from '../subworkflows/local/longread_preprocessing'
include { SHORTREAD_HOSTREMOVAL } from '../subworkflows/local/shortread_hostremoval'
include { LONGREAD_HOSTREMOVAL } from '../subworkflows/local/longread_hostremoval'
include { SHORTREAD_COMPLEXITYFILTERING } from '../subworkflows/local/shortread_complexityfiltering'
include { PROFILING } from '../subworkflows/local/profiling'
@ -132,7 +135,8 @@ workflow TAXPROFILER {
SUBWORKFLOW: COMPLEXITY FILTERING
*/
if ( params.perform_shortread_complexityfilter ) {
// fastp complexity filtering is activated via modules.conf in shortread_preprocessing
if ( params.perform_shortread_complexityfilter && params.shortread_complexityfilter_tool != 'fastp' ) {
ch_shortreads_filtered = SHORTREAD_COMPLEXITYFILTERING ( ch_shortreads_preprocessed ).reads
ch_versions = ch_versions.mix( SHORTREAD_COMPLEXITYFILTERING.out.versions )
} else {
@ -144,16 +148,23 @@ workflow TAXPROFILER {
*/
if ( params.perform_shortread_hostremoval ) {
ch_shortreads_hostremoved = SHORTREAD_HOSTREMOVAL ( ch_shortreads_filtered, ch_reference, ch_reference_index ).reads
ch_shortreads_hostremoved = SHORTREAD_HOSTREMOVAL ( ch_shortreads_filtered, ch_reference, ch_shortread_reference_index ).reads
ch_versions = ch_versions.mix(SHORTREAD_HOSTREMOVAL.out.versions)
} else {
ch_shortreads_hostremoved = ch_shortreads_filtered
}
if ( params.perform_longread_hostremoval ) {
ch_longreads_hostremoved = LONGREAD_HOSTREMOVAL ( ch_longreads_preprocessed, ch_reference, ch_longread_reference_index ).reads
ch_versions = ch_versions.mix(LONGREAD_HOSTREMOVAL.out.versions)
} else {
ch_longreads_hostremoved = ch_longreads_preprocessed
}
if ( params.perform_runmerging ) {
ch_reads_for_cat_branch = ch_shortreads_hostremoved
.mix( ch_longreads_preprocessed )
.mix( ch_longreads_hostremoved )
.map {
meta, reads ->
def meta_new = meta.clone()
@ -185,7 +196,7 @@ workflow TAXPROFILER {
} else {
ch_reads_runmerged = ch_shortreads_hostremoved
.mix( ch_longreads_preprocessed, INPUT_CHECK.out.fasta )
.mix( ch_longreads_hostremoved, INPUT_CHECK.out.fasta )
}
/*
@ -222,7 +233,7 @@ workflow TAXPROFILER {
ch_multiqc_files = ch_multiqc_files.mix( LONGREAD_PREPROCESSING.out.mqc.collect{it[1]}.ifEmpty([]) )
}
if (params.perform_shortread_complexityfilter){
if (params.perform_shortread_complexityfilter && params.shortread_complexityfilter_tool != 'fastp'){
ch_multiqc_files = ch_multiqc_files.mix( SHORTREAD_COMPLEXITYFILTERING.out.mqc.collect{it[1]}.ifEmpty([]) )
}