mirror of
https://github.com/MillironX/taxprofiler.git
synced 2024-11-13 07:23:10 +00:00
Merge pull request #34 from nf-core/adapterremoval
Add AdapterRemoval as alternative trimer/merger
This commit is contained in:
commit
1dfbcacf68
15 changed files with 346 additions and 35 deletions
6
.github/workflows/ci.yml
vendored
6
.github/workflows/ci.yml
vendored
|
@ -28,6 +28,10 @@ jobs:
|
|||
# Test latest edge release of Nextflow
|
||||
- NXF_VER: ""
|
||||
NXF_EDGE: "1"
|
||||
parameters:
|
||||
- "--shortread_clipmerge_tool fastp"
|
||||
- "--shortread_clipmerge_tool adapterremoval"
|
||||
|
||||
steps:
|
||||
- name: Check out pipeline code
|
||||
uses: actions/checkout@v2
|
||||
|
@ -47,6 +51,6 @@ jobs:
|
|||
# For example: adding multiple test runs with different parameters
|
||||
# Remember that you can parallelise this by using strategy.matrix
|
||||
run: |
|
||||
nextflow run ${GITHUB_WORKSPACE} -profile test,docker --outdir ./results
|
||||
nextflow run ${GITHUB_WORKSPACE} -profile test,docker --outdir ./results ${{ matrix.parameters }}
|
||||
|
||||
#
|
||||
|
|
19
CITATIONS.md
19
CITATIONS.md
|
@ -13,9 +13,26 @@
|
|||
- [FastQC](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/)
|
||||
|
||||
- [MultiQC](https://pubmed.ncbi.nlm.nih.gov/27312411/)
|
||||
|
||||
> Ewels P, Magnusson M, Lundin S, Käller M. MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics. 2016 Oct 1;32(19):3047-8. doi: 10.1093/bioinformatics/btw354. Epub 2016 Jun 16. PubMed PMID: 27312411; PubMed Central PMCID: PMC5039924.
|
||||
|
||||
* [Porechop](https://github.com/rrwick/Porechop)
|
||||
- [fastp](https://doi.org/10.1093/bioinformatics/bty560)
|
||||
|
||||
> Chen, Shifu, Yanqing Zhou, Yaru Chen, and Jia Gu. 2018. “Fastp: An Ultra-Fast All-in-One FASTQ Preprocessor.” Bioinformatics 34 (17): i884-90. 10.1093/bioinformatics/bty560.
|
||||
|
||||
- [AdapterRemoval2](https://doi.org/10.1186/s13104-016-1900-2)
|
||||
|
||||
> Schubert, Mikkel, Stinus Lindgreen, and Ludovic Orlando. 2016. “AdapterRemoval v2: Rapid Adapter Trimming, Identification, and Read Merging.” BMC Research Notes 9 (February): 88. doi:10.1186/s13104-016-1900-2.
|
||||
|
||||
- [Porechop](https://github.com/rrwick/Porechop)
|
||||
|
||||
- [Kraken2](https://doi.org/10.1186/s13059-019-1891-0)
|
||||
|
||||
> Wood, Derrick E., Jennifer Lu, and Ben Langmead. 2019. “Improved Metagenomic Analysis with Kraken 2.” Genome Biology 20 (1): 257. doi: 10.1186/s13059-019-1891-0.
|
||||
|
||||
- [MALT](https://doi.org/10.1038/s41559-017-0446-6)
|
||||
|
||||
> Vågene, Åshild J., Alexander Herbig, Michael G. Campana, Nelly M. Robles García, Christina Warinner, Susanna Sabin, Maria A. Spyrou, et al. 2018. “Salmonella Enterica Genomes from Victims of a Major Sixteenth-Century Epidemic in Mexico.” Nature Ecology & Evolution 2 (3): 520-28. doi: 10.1038/s41559-017-0446-6.
|
||||
|
||||
## Software packaging/containerisation tools
|
||||
|
||||
|
|
|
@ -62,18 +62,15 @@ process {
|
|||
]
|
||||
}
|
||||
|
||||
withName: FASTP {
|
||||
ext.prefix = { "${meta.id}_${meta.run_accession}" }
|
||||
withName: FASTP_SINGLE {
|
||||
ext.args = [
|
||||
// collapsing options - option to retain singletons
|
||||
params.shortread_clipmerge_excludeunmerged ? '' : "--include_unmerged",
|
||||
// trimming options
|
||||
params.shortread_clipmerge_skipadaptertrim ? "--disable_adapter_trimming" : "",
|
||||
params.shortread_clipmerge_adapter1 ? "--adapter_sequence ${params.shortread_clipmerge_adapter1}" : "",
|
||||
!{ ${meta.single_end} } && params.shortread_clipmerge_adapter2 ? "--adapter_sequence_r2 ${params.shortread_clipmerge_adapter2}" : !{ ${meta.single_end} } ? "--detect_adapter_for_pe" : "",
|
||||
// filtering options
|
||||
"--length_required ${params.shortread_clipmerge_minlength}"
|
||||
].join(' ').trim()
|
||||
ext.prefix = { "${meta.id}_${meta.run_accession}" }
|
||||
publishDir = [
|
||||
path: { "${params.outdir}/fastp" },
|
||||
mode: 'copy',
|
||||
|
@ -81,6 +78,61 @@ process {
|
|||
]
|
||||
}
|
||||
|
||||
withName: FASTP_PAIRED {
|
||||
ext.args = [
|
||||
// collapsing options - option to retain singletons
|
||||
params.shortread_clipmerge_excludeunmerged ? '' : "--include_unmerged",
|
||||
// trimming options
|
||||
params.shortread_clipmerge_skipadaptertrim ? "--disable_adapter_trimming" : "",
|
||||
params.shortread_clipmerge_adapter1 ? "--adapter_sequence ${params.shortread_clipmerge_adapter1}" : "",
|
||||
params.shortread_clipmerge_adapter2 ? "--adapter_sequence_r2 ${params.shortread_clipmerge_adapter2}" : "--detect_adapter_for_pe",
|
||||
// filtering options
|
||||
"--length_required ${params.shortread_clipmerge_minlength}"
|
||||
].join(' ').trim()
|
||||
ext.prefix = { "${meta.id}_${meta.run_accession}" }
|
||||
publishDir = [
|
||||
path: { "${params.outdir}/fastp" },
|
||||
mode: 'copy',
|
||||
pattern: '*.fastq.gz'
|
||||
]
|
||||
}
|
||||
|
||||
withName: ADAPTERREMOVAL_SINGLE {
|
||||
ext.args = [
|
||||
// trimming options
|
||||
params.shortread_clipmerge_skipadaptertrim ? "--adapter1 '' --adapter2 ''" : "",
|
||||
params.shortread_clipmerge_adapter1 ? "--adapter1 ${params.shortread_clipmerge_adapter1}" : "",
|
||||
// filtering options
|
||||
"--minlength ${params.shortread_clipmerge_minlength}"
|
||||
].join(' ').trim()
|
||||
ext.prefix = { "${meta.id}_${meta.run_accession}" }
|
||||
publishDir = [
|
||||
path: { "${params.outdir}/adapterremoval" },
|
||||
mode: 'copy',
|
||||
pattern: '*.fastq.gz'
|
||||
]
|
||||
}
|
||||
|
||||
withName: ADAPTERREMOVAL_PAIRED {
|
||||
ext.args = [
|
||||
// collapsing options
|
||||
params.shortread_clipmerge_mergepairs ? "--collapse" : "",
|
||||
// trimming options
|
||||
params.shortread_clipmerge_skipadaptertrim ? "--adapter1 '' --adapter2 ''" : "",
|
||||
params.shortread_clipmerge_adapter1 ? "--adapter1 ${params.shortread_clipmerge_adapter1}" : "",
|
||||
params.shortread_clipmerge_adapter2 ? "--adapter2 ${params.shortread_clipmerge_adapter2}" : "",
|
||||
// filtering options
|
||||
"--minlength ${params.shortread_clipmerge_minlength}"
|
||||
].join(' ').trim()
|
||||
ext.prefix = { "${meta.id}_${meta.run_accession}" }
|
||||
publishDir = [
|
||||
path: { "${params.outdir}/adapterremoval" },
|
||||
mode: 'copy',
|
||||
pattern: '*.fastq.gz'
|
||||
]
|
||||
}
|
||||
|
||||
|
||||
withName: PORECHOP {
|
||||
ext.prefix = { "${meta.id}_${meta.run_accession}" }
|
||||
publishDir = [
|
||||
|
@ -99,23 +151,23 @@ process {
|
|||
}
|
||||
|
||||
withName: MALT_RUN {
|
||||
ext.args = { "${meta.db_params}" }
|
||||
ext.prefix = { "${meta.id}-${meta.run_accession}-${meta.db_name}" }
|
||||
publishDir = [
|
||||
path: { "${params.outdir}/malt/${meta.db_name}" },
|
||||
mode: 'copy',
|
||||
pattern: '*.{rma6,tab,text,sam,log}'
|
||||
]
|
||||
ext.args = { "${meta.db_params}" }
|
||||
ext.prefix = { "${meta.id}-${meta.run_accession}-${meta.db_name}" }
|
||||
}
|
||||
|
||||
withName: KRAKEN2_KRAKEN2 {
|
||||
ext.args = { "${meta.db_params}" }
|
||||
ext.prefix = { "${meta.id}-${meta.run_accession}-${meta.db_name}" }
|
||||
publishDir = [
|
||||
path: { "${params.outdir}/kraken2/${meta.db_name}" },
|
||||
mode: 'copy',
|
||||
pattern: '*.{fastq.gz,txt}'
|
||||
]
|
||||
ext.args = { "${meta.db_params}" }
|
||||
ext.prefix = { "${meta.id}-${meta.run_accession}-${meta.db_name}" }
|
||||
}
|
||||
|
||||
withName: CUSTOM_DUMPSOFTWAREVERSIONS {
|
||||
|
|
|
@ -3,6 +3,9 @@
|
|||
"homePage": "https://github.com/nf-core/taxprofiler",
|
||||
"repos": {
|
||||
"nf-core/modules": {
|
||||
"adapterremoval": {
|
||||
"git_sha": "f0800157544a82ae222931764483331a81812012"
|
||||
},
|
||||
"cat/fastq": {
|
||||
"git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d"
|
||||
},
|
||||
|
|
70
modules/nf-core/modules/adapterremoval/main.nf
generated
Normal file
70
modules/nf-core/modules/adapterremoval/main.nf
generated
Normal file
|
@ -0,0 +1,70 @@
|
|||
process ADAPTERREMOVAL {
|
||||
tag "$meta.id"
|
||||
label 'process_medium'
|
||||
|
||||
conda (params.enable_conda ? "bioconda::adapterremoval=2.3.2" : null)
|
||||
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
|
||||
'https://depot.galaxyproject.org/singularity/adapterremoval:2.3.2--hb7ba0dd_0' :
|
||||
'quay.io/biocontainers/adapterremoval:2.3.2--hb7ba0dd_0' }"
|
||||
|
||||
input:
|
||||
tuple val(meta), path(reads)
|
||||
path(adapterlist)
|
||||
|
||||
output:
|
||||
tuple val(meta), path("${prefix}.truncated.gz") , optional: true, emit: singles_truncated
|
||||
tuple val(meta), path("${prefix}.discarded.gz") , optional: true, emit: discarded
|
||||
tuple val(meta), path("${prefix}.pair1.truncated.gz") , optional: true, emit: pair1_truncated
|
||||
tuple val(meta), path("${prefix}.pair2.truncated.gz") , optional: true, emit: pair2_truncated
|
||||
tuple val(meta), path("${prefix}.collapsed.gz") , optional: true, emit: collapsed
|
||||
tuple val(meta), path("${prefix}.collapsed.truncated.gz") , optional: true, emit: collapsed_truncated
|
||||
tuple val(meta), path("${prefix}.paired.gz") , optional: true, emit: paired_interleaved
|
||||
tuple val(meta), path('*.log') , emit: log
|
||||
path "versions.yml" , emit: versions
|
||||
|
||||
when:
|
||||
task.ext.when == null || task.ext.when
|
||||
|
||||
script:
|
||||
def args = task.ext.args ?: ''
|
||||
def list = adapterlist ? "--adapter-list ${adapterlist}" : ""
|
||||
prefix = task.ext.prefix ?: "${meta.id}"
|
||||
|
||||
if (meta.single_end) {
|
||||
"""
|
||||
AdapterRemoval \\
|
||||
--file1 $reads \\
|
||||
$args \\
|
||||
$adapterlist \\
|
||||
--basename ${prefix} \\
|
||||
--threads ${task.cpus} \\
|
||||
--settings ${prefix}.log \\
|
||||
--seed 42 \\
|
||||
--gzip
|
||||
|
||||
cat <<-END_VERSIONS > versions.yml
|
||||
"${task.process}":
|
||||
adapterremoval: \$(AdapterRemoval --version 2>&1 | sed -e "s/AdapterRemoval ver. //g")
|
||||
END_VERSIONS
|
||||
"""
|
||||
} else {
|
||||
"""
|
||||
AdapterRemoval \\
|
||||
--file1 ${reads[0]} \\
|
||||
--file2 ${reads[1]} \\
|
||||
$args \\
|
||||
$adapterlist \\
|
||||
--basename ${prefix} \\
|
||||
--threads $task.cpus \\
|
||||
--settings ${prefix}.log \\
|
||||
--seed 42 \\
|
||||
--gzip
|
||||
|
||||
cat <<-END_VERSIONS > versions.yml
|
||||
"${task.process}":
|
||||
adapterremoval: \$(AdapterRemoval --version 2>&1 | sed -e "s/AdapterRemoval ver. //g")
|
||||
END_VERSIONS
|
||||
"""
|
||||
}
|
||||
|
||||
}
|
90
modules/nf-core/modules/adapterremoval/meta.yml
generated
Normal file
90
modules/nf-core/modules/adapterremoval/meta.yml
generated
Normal file
|
@ -0,0 +1,90 @@
|
|||
name: adapterremoval
|
||||
description: Trim sequencing adapters and collapse overlapping reads
|
||||
keywords:
|
||||
- trimming
|
||||
- adapters
|
||||
- merging
|
||||
- fastq
|
||||
tools:
|
||||
- adapterremoval:
|
||||
description: The AdapterRemoval v2 tool for merging and clipping reads.
|
||||
homepage: https://github.com/MikkelSchubert/adapterremoval
|
||||
documentation: https://adapterremoval.readthedocs.io
|
||||
licence: ["GPL v3"]
|
||||
|
||||
input:
|
||||
- meta:
|
||||
type: map
|
||||
description: |
|
||||
Groovy Map containing sample information
|
||||
e.g. [ id:'test', single_end:false ]
|
||||
- reads:
|
||||
type: file
|
||||
description: |
|
||||
List of input FastQ files of size 1 and 2 for single-end and paired-end data,
|
||||
respectively.
|
||||
pattern: "*.{fq,fastq,fq.gz,fastq.gz}"
|
||||
- adapterlist:
|
||||
type: file
|
||||
description: Optional text file containing list of adapters to look for for removal
|
||||
with one adapter per line. Otherwise will look for default adapters (see
|
||||
AdapterRemoval man page), or can be modified to remove user-specified
|
||||
adapters via ext.args.
|
||||
|
||||
output:
|
||||
- meta:
|
||||
type: map
|
||||
description: |
|
||||
Groovy Map containing sample information
|
||||
e.g. [ id:'test', single_end:false ]
|
||||
- singles_truncated:
|
||||
type: file
|
||||
description: |
|
||||
Adapter trimmed FastQ files of either single-end reads, or singleton
|
||||
'orphaned' reads from merging of paired-end data (i.e., one of the pair
|
||||
was lost due to filtering thresholds).
|
||||
pattern: "*.truncated.gz"
|
||||
- discarded:
|
||||
type: file
|
||||
description: |
|
||||
Adapter trimmed FastQ files of reads that did not pass filtering
|
||||
thresholds.
|
||||
pattern: "*.discarded.gz"
|
||||
- pair1_truncated:
|
||||
type: file
|
||||
description: |
|
||||
Adapter trimmed R1 FastQ files of paired-end reads that did not merge
|
||||
with their respective R2 pair due to long templates. The respective pair
|
||||
is stored in 'pair2_truncated'.
|
||||
pattern: "*.pair1.truncated.gz"
|
||||
- pair2_truncated:
|
||||
type: file
|
||||
description: |
|
||||
Adapter trimmed R2 FastQ files of paired-end reads that did not merge
|
||||
with their respective R1 pair due to long templates. The respective pair
|
||||
is stored in 'pair1_truncated'.
|
||||
pattern: "*.pair2.truncated.gz"
|
||||
- collapsed:
|
||||
type: file
|
||||
description: |
|
||||
Collapsed FastQ of paired-end reads that successfully merged with their
|
||||
respective R1 pair but were not trimmed.
|
||||
pattern: "*.collapsed.gz"
|
||||
- collapsed_truncated:
|
||||
type: file
|
||||
description: |
|
||||
Collapsed FastQ of paired-end reads that successfully merged with their
|
||||
respective R1 pair and were trimmed of adapter due to sufficient overlap.
|
||||
pattern: "*.collapsed.truncated.gz"
|
||||
- log:
|
||||
type: file
|
||||
description: AdapterRemoval log file
|
||||
pattern: "*.log"
|
||||
- versions:
|
||||
type: file
|
||||
description: File containing software versions
|
||||
pattern: "versions.yml"
|
||||
|
||||
authors:
|
||||
- "@maxibor"
|
||||
- "@jfy133"
|
|
@ -59,7 +59,7 @@ params {
|
|||
shortread_clipmerge_tool = 'fastp'
|
||||
shortread_clipmerge_skipadaptertrim = false
|
||||
shortread_clipmerge_mergepairs = false
|
||||
shortread_clipmerge_excludeunmerged = true
|
||||
shortread_clipmerge_excludeunmerged = false
|
||||
shortread_clipmerge_adapter1 = null
|
||||
shortread_clipmerge_adapter2 = null
|
||||
shortread_clipmerge_minlength = 15
|
||||
|
|
|
@ -267,7 +267,7 @@
|
|||
},
|
||||
"shortread_clipmerge_excludeunmerged": {
|
||||
"type": "boolean",
|
||||
"default": true
|
||||
"default": false
|
||||
},
|
||||
"longread_clip": {
|
||||
"type": "boolean"
|
||||
|
@ -284,7 +284,8 @@
|
|||
},
|
||||
"shortread_clipmerge_tool": {
|
||||
"type": "string",
|
||||
"default": "fastp"
|
||||
"default": "fastp",
|
||||
"enum": ["fastp", "adapterremoval"]
|
||||
},
|
||||
"shortread_clipmerge_skipadaptertrim": {
|
||||
"type": "boolean"
|
||||
|
@ -294,11 +295,11 @@
|
|||
},
|
||||
"shortread_clipmerge_adapter1": {
|
||||
"type": "string",
|
||||
"default": null
|
||||
"default": "None"
|
||||
},
|
||||
"shortread_clipmerge_adapter2": {
|
||||
"type": "string",
|
||||
"default": null
|
||||
"default": "None"
|
||||
},
|
||||
"shortread_clipmerge_minlength": {
|
||||
"type": "integer",
|
||||
|
|
|
@ -18,9 +18,7 @@ workflow DB_CHECK {
|
|||
parsed_samplesheet = DATABASE_CHECK ( dbsheet )
|
||||
.csv
|
||||
.splitCsv ( header:true, sep:',' )
|
||||
.dump(tag: "db_split_csv_out")
|
||||
.map { create_db_channels(it) }
|
||||
.dump(tag: "db_channel_prepped")
|
||||
|
||||
ch_dbs_for_untar = parsed_samplesheet
|
||||
.branch {
|
||||
|
|
|
@ -12,7 +12,6 @@ workflow INPUT_CHECK {
|
|||
parsed_samplesheet = SAMPLESHEET_CHECK ( samplesheet )
|
||||
.csv
|
||||
.splitCsv ( header:true, sep:',' )
|
||||
.dump(tag: "input_split_csv_out")
|
||||
.branch {
|
||||
fasta: it['fasta'] != ''
|
||||
nanopore: it['instrument_platform'] == 'OXFORD_NANOPORE'
|
||||
|
@ -21,17 +20,14 @@ workflow INPUT_CHECK {
|
|||
|
||||
parsed_samplesheet.fastq
|
||||
.map { create_fastq_channel(it) }
|
||||
.dump(tag: "fastq_channel_init")
|
||||
.set { fastq }
|
||||
|
||||
parsed_samplesheet.nanopore
|
||||
.map { create_fastq_channel(it) }
|
||||
.dump(tag: "fastq_nanopore_channel_init")
|
||||
.set { nanopore }
|
||||
|
||||
parsed_samplesheet.fasta
|
||||
.map { create_fasta_channel(it) }
|
||||
.dump(tag: "fasta_channel_init")
|
||||
.set { fasta }
|
||||
|
||||
emit:
|
||||
|
|
|
@ -16,7 +16,6 @@ workflow LONGREAD_PREPROCESSING {
|
|||
PORECHOP ( reads )
|
||||
|
||||
ch_processed_reads = PORECHOP.out.reads
|
||||
.dump(tag: "pre_fastqc_check")
|
||||
.map {
|
||||
meta, reads ->
|
||||
def meta_new = meta.clone()
|
||||
|
|
86
subworkflows/local/shortread_adapterremoval.nf
Normal file
86
subworkflows/local/shortread_adapterremoval.nf
Normal file
|
@ -0,0 +1,86 @@
|
|||
/*
|
||||
Process short raw reads with AdapterRemoval
|
||||
*/
|
||||
|
||||
include { ADAPTERREMOVAL as ADAPTERREMOVAL_SINGLE } from '../../modules/nf-core/modules/adapterremoval/main'
|
||||
include { ADAPTERREMOVAL as ADAPTERREMOVAL_PAIRED } from '../../modules/nf-core/modules/adapterremoval/main'
|
||||
include { CAT_FASTQ } from '../../modules/nf-core/modules/cat/fastq/main'
|
||||
|
||||
workflow SHORTREAD_ADAPTERREMOVAL {
|
||||
|
||||
take:
|
||||
reads // [[meta], [reads]]
|
||||
|
||||
main:
|
||||
ch_versions = Channel.empty()
|
||||
ch_multiqc_files = Channel.empty()
|
||||
|
||||
ch_input_for_adapterremoval = reads
|
||||
.branch{
|
||||
single: it[0].single_end
|
||||
paired: !it[0].single_end
|
||||
}
|
||||
|
||||
ADAPTERREMOVAL_SINGLE ( ch_input_for_adapterremoval.single, [] )
|
||||
ADAPTERREMOVAL_PAIRED ( ch_input_for_adapterremoval.paired, [] )
|
||||
|
||||
// due to the slightly ugly output implementation of the current AdapterRemoval2 version, each file
|
||||
// has to be exported in a separate channel, and we must manually recombine when necessary
|
||||
|
||||
if ( params.shortread_clipmerge_mergepairs && !params.shortread_clipmerge_excludeunmerged ) {
|
||||
ch_adapterremoval_for_cat = ADAPTERREMOVAL_PAIRED.out.collapsed
|
||||
.mix(
|
||||
ADAPTERREMOVAL_PAIRED.out.collapsed_truncated,
|
||||
ADAPTERREMOVAL_PAIRED.out.singles_truncated,
|
||||
ADAPTERREMOVAL_PAIRED.out.pair1_truncated,
|
||||
ADAPTERREMOVAL_PAIRED.out.pair2_truncated
|
||||
)
|
||||
.map {
|
||||
meta, reads ->
|
||||
def meta_new = meta.clone()
|
||||
meta_new.single_end = true
|
||||
|
||||
[ meta_new, reads ]
|
||||
}
|
||||
.groupTuple()
|
||||
|
||||
ch_adapterremoval_reads_prepped = CAT_FASTQ ( ch_adapterremoval_for_cat ).reads
|
||||
.mix( ADAPTERREMOVAL_SINGLE.out.singles_truncated )
|
||||
|
||||
} else if ( params.shortread_clipmerge_mergepairs && params.shortread_clipmerge_excludeunmerged ) {
|
||||
ch_adapterremoval_for_cat = ADAPTERREMOVAL_PAIRED.out.collapsed
|
||||
.mix( ADAPTERREMOVAL_PAIRED.out.collapsed_truncated )
|
||||
.map {
|
||||
meta, reads ->
|
||||
def meta_new = meta.clone()
|
||||
meta_new['single_end'] = true
|
||||
|
||||
[ meta_new, reads ]
|
||||
}
|
||||
.groupTuple(by: 0)
|
||||
|
||||
ch_adapterremoval_reads_prepped = CAT_FASTQ ( ch_adapterremoval_for_cat ).reads
|
||||
.mix( ADAPTERREMOVAL_SINGLE.out.singles_truncated )
|
||||
|
||||
} else {
|
||||
|
||||
ch_adapterremoval_reads_prepped = ADAPTERREMOVAL_PAIRED.out.pair1_truncated
|
||||
.join( ADAPTERREMOVAL_PAIRED.out.pair2_truncated )
|
||||
.groupTuple()
|
||||
.map { meta, pair1, pair2 ->
|
||||
[ meta, [ pair1, pair2 ].flatten() ]
|
||||
}
|
||||
.mix( ADAPTERREMOVAL_SINGLE.out.singles_truncated )
|
||||
}
|
||||
|
||||
ch_processed_reads = ch_adapterremoval_reads_prepped
|
||||
|
||||
ch_versions = ch_versions.mix( ADAPTERREMOVAL_SINGLE.out.versions.first() )
|
||||
ch_versions = ch_versions.mix( ADAPTERREMOVAL_PAIRED.out.versions.first() )
|
||||
ch_multiqc_files = ch_multiqc_files.mix( ADAPTERREMOVAL_PAIRED.out.log.collect{it[1]}, ADAPTERREMOVAL_SINGLE.out.log.collect{it[1]} )
|
||||
|
||||
emit:
|
||||
reads = ch_processed_reads // channel: [ val(meta), [ reads ] ]
|
||||
versions = ch_versions // channel: [ versions.yml ]
|
||||
mqc = ch_multiqc_files
|
||||
}
|
|
@ -14,15 +14,11 @@ workflow SHORTREAD_FASTP {
|
|||
ch_multiqc_files = Channel.empty()
|
||||
|
||||
ch_input_for_fastp = reads
|
||||
.dump(tag: "pre-fastp_branch")
|
||||
.branch{
|
||||
single: it[0]['single_end'] == true
|
||||
paired: it[0]['single_end'] == false
|
||||
}
|
||||
|
||||
ch_input_for_fastp.single.dump(tag: "input_fastp_single")
|
||||
ch_input_for_fastp.paired.dump(tag: "input_fastp_paired")
|
||||
|
||||
FASTP_SINGLE ( ch_input_for_fastp.single, false, false )
|
||||
// Last parameter here turns on merging of PE data
|
||||
FASTP_PAIRED ( ch_input_for_fastp.paired, false, params.shortread_clipmerge_mergepairs )
|
||||
|
@ -46,13 +42,11 @@ workflow SHORTREAD_FASTP {
|
|||
ch_versions = ch_versions.mix(FASTP_SINGLE.out.versions.first())
|
||||
ch_versions = ch_versions.mix(FASTP_PAIRED.out.versions.first())
|
||||
|
||||
ch_processed_reads = ch_fastp_reads_prepped.dump(tag: "ch_fastp_reads_prepped")
|
||||
ch_processed_reads = ch_fastp_reads_prepped
|
||||
|
||||
ch_multiqc_files = ch_multiqc_files.mix( FASTP_SINGLE.out.json.collect{it[1]} )
|
||||
ch_multiqc_files = ch_multiqc_files.mix( FASTP_PAIRED.out.json.collect{it[1]} )
|
||||
|
||||
ch_multiqc_files.dump(tag: "preprocessing_fastp_mqc_final")
|
||||
|
||||
emit:
|
||||
reads = ch_processed_reads // channel: [ val(meta), [ reads ] ]
|
||||
versions = ch_versions // channel: [ versions.yml ]
|
||||
|
|
|
@ -4,6 +4,7 @@
|
|||
|
||||
|
||||
include { SHORTREAD_FASTP } from './shortread_fastp'
|
||||
include { SHORTREAD_ADAPTERREMOVAL } from './shortread_adapterremoval'
|
||||
include { FASTQC as FASTQC_PROCESSED } from '../../modules/nf-core/modules/fastqc/main'
|
||||
|
||||
workflow SHORTREAD_PREPROCESSING {
|
||||
|
@ -18,6 +19,10 @@ workflow SHORTREAD_PREPROCESSING {
|
|||
ch_processed_reads = SHORTREAD_FASTP ( reads ).reads
|
||||
ch_versions = ch_versions.mix( SHORTREAD_FASTP.out.versions )
|
||||
ch_multiqc_files = ch_multiqc_files.mix( SHORTREAD_FASTP.out.mqc )
|
||||
} else if ( params.shortread_clipmerge_tool == "adapterremoval" ) {
|
||||
ch_processed_reads = SHORTREAD_ADAPTERREMOVAL ( reads ).reads
|
||||
ch_versions = ch_versions.mix( SHORTREAD_ADAPTERREMOVAL.out.versions )
|
||||
ch_multiqc_files = ch_multiqc_files.mix( SHORTREAD_ADAPTERREMOVAL.out.mqc )
|
||||
} else {
|
||||
ch_processed_reads = reads
|
||||
}
|
||||
|
|
|
@ -18,6 +18,7 @@ for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true
|
|||
if (params.input ) { ch_input = file(params.input) } else { exit 1, 'Input samplesheet not specified!' }
|
||||
if (params.databases) { ch_databases = file(params.databases) } else { exit 1, 'Input database sheet not specified!' }
|
||||
if (params.shortread_clipmerge_mergepairs && params.run_malt ) log.warn "[nf-core/taxprofiler] warning: MALT does not except uncollapsed paired-reads. Pairs will be profiled as separate files."
|
||||
if (params.shortread_clipmerge_excludeunmerged && !params.shortread_clipmerge_mergepairs) exit 1, "[nf-core/taxprofiler] error: cannot include unmerged reads when merging not turned on. Please specify --shortread_clipmerge_mergepairs"
|
||||
|
||||
/*
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
@ -126,7 +127,6 @@ workflow TAXPROFILER {
|
|||
ch_input_for_profiling = ch_shortreads_preprocessed
|
||||
.mix( ch_longreads_preprocessed )
|
||||
.combine(DB_CHECK.out.dbs)
|
||||
.dump(tag: "reads_plus_db_clean")
|
||||
.branch {
|
||||
malt: it[2]['tool'] == 'malt'
|
||||
kraken2: it[2]['tool'] == 'kraken2'
|
||||
|
@ -141,9 +141,7 @@ workflow TAXPROFILER {
|
|||
// loading takes a long time, so we only want to run it once per database
|
||||
// TODO document somewhere we only accept illumina short reads for MALT?
|
||||
ch_input_for_malt = ch_input_for_profiling.malt
|
||||
.dump(tag: "input_to_malt_prefilter")
|
||||
.filter { it[0]['instrument_platform'] == 'ILLUMINA' }
|
||||
.dump(tag: "input_to_malt_postfilter")
|
||||
.map {
|
||||
it ->
|
||||
def temp_meta = [ id: it[2]['db_name']] + it[2]
|
||||
|
@ -151,7 +149,6 @@ workflow TAXPROFILER {
|
|||
[ temp_meta, it[1], db ]
|
||||
}
|
||||
.groupTuple(by: [0,2])
|
||||
.dump(tag: "input_to_malt")
|
||||
.multiMap {
|
||||
it ->
|
||||
reads: [ it[0], it[1].flatten() ]
|
||||
|
@ -160,7 +157,6 @@ workflow TAXPROFILER {
|
|||
|
||||
// We can run Kraken2 one-by-one sample-wise
|
||||
ch_input_for_kraken2 = ch_input_for_profiling.kraken2
|
||||
.dump(tag: "input_to_kraken")
|
||||
.multiMap {
|
||||
it ->
|
||||
reads: [ it[0] + it[2], it[1] ]
|
||||
|
|
Loading…
Reference in a new issue