mirror of
https://github.com/MillironX/taxprofiler.git
synced 2024-11-22 03:26:03 +00:00
Start working on adding adapterremoval
This commit is contained in:
parent
323883bd3e
commit
c4c93bd59d
7 changed files with 281 additions and 14 deletions
|
@ -3,6 +3,9 @@
|
||||||
"homePage": "https://github.com/nf-core/taxprofiler",
|
"homePage": "https://github.com/nf-core/taxprofiler",
|
||||||
"repos": {
|
"repos": {
|
||||||
"nf-core/modules": {
|
"nf-core/modules": {
|
||||||
|
"adapterremoval": {
|
||||||
|
"git_sha": "f0800157544a82ae222931764483331a81812012"
|
||||||
|
},
|
||||||
"cat/fastq": {
|
"cat/fastq": {
|
||||||
"git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d"
|
"git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d"
|
||||||
},
|
},
|
||||||
|
@ -32,4 +35,4 @@
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
70
modules/nf-core/modules/adapterremoval/main.nf
generated
Normal file
70
modules/nf-core/modules/adapterremoval/main.nf
generated
Normal file
|
@ -0,0 +1,70 @@
|
||||||
|
process ADAPTERREMOVAL {
|
||||||
|
tag "$meta.id"
|
||||||
|
label 'process_medium'
|
||||||
|
|
||||||
|
conda (params.enable_conda ? "bioconda::adapterremoval=2.3.2" : null)
|
||||||
|
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
|
||||||
|
'https://depot.galaxyproject.org/singularity/adapterremoval:2.3.2--hb7ba0dd_0' :
|
||||||
|
'quay.io/biocontainers/adapterremoval:2.3.2--hb7ba0dd_0' }"
|
||||||
|
|
||||||
|
input:
|
||||||
|
tuple val(meta), path(reads)
|
||||||
|
path(adapterlist)
|
||||||
|
|
||||||
|
output:
|
||||||
|
tuple val(meta), path("${prefix}.truncated.gz") , optional: true, emit: singles_truncated
|
||||||
|
tuple val(meta), path("${prefix}.discarded.gz") , optional: true, emit: discarded
|
||||||
|
tuple val(meta), path("${prefix}.pair1.truncated.gz") , optional: true, emit: pair1_truncated
|
||||||
|
tuple val(meta), path("${prefix}.pair2.truncated.gz") , optional: true, emit: pair2_truncated
|
||||||
|
tuple val(meta), path("${prefix}.collapsed.gz") , optional: true, emit: collapsed
|
||||||
|
tuple val(meta), path("${prefix}.collapsed.truncated.gz") , optional: true, emit: collapsed_truncated
|
||||||
|
tuple val(meta), path("${prefix}.paired.gz") , optional: true, emit: paired_interleaved
|
||||||
|
tuple val(meta), path('*.log') , emit: log
|
||||||
|
path "versions.yml" , emit: versions
|
||||||
|
|
||||||
|
when:
|
||||||
|
task.ext.when == null || task.ext.when
|
||||||
|
|
||||||
|
script:
|
||||||
|
def args = task.ext.args ?: ''
|
||||||
|
def list = adapterlist ? "--adapter-list ${adapterlist}" : ""
|
||||||
|
prefix = task.ext.prefix ?: "${meta.id}"
|
||||||
|
|
||||||
|
if (meta.single_end) {
|
||||||
|
"""
|
||||||
|
AdapterRemoval \\
|
||||||
|
--file1 $reads \\
|
||||||
|
$args \\
|
||||||
|
$adapterlist \\
|
||||||
|
--basename ${prefix} \\
|
||||||
|
--threads ${task.cpus} \\
|
||||||
|
--settings ${prefix}.log \\
|
||||||
|
--seed 42 \\
|
||||||
|
--gzip
|
||||||
|
|
||||||
|
cat <<-END_VERSIONS > versions.yml
|
||||||
|
"${task.process}":
|
||||||
|
adapterremoval: \$(AdapterRemoval --version 2>&1 | sed -e "s/AdapterRemoval ver. //g")
|
||||||
|
END_VERSIONS
|
||||||
|
"""
|
||||||
|
} else {
|
||||||
|
"""
|
||||||
|
AdapterRemoval \\
|
||||||
|
--file1 ${reads[0]} \\
|
||||||
|
--file2 ${reads[1]} \\
|
||||||
|
$args \\
|
||||||
|
$adapterlist \\
|
||||||
|
--basename ${prefix} \\
|
||||||
|
--threads $task.cpus \\
|
||||||
|
--settings ${prefix}.log \\
|
||||||
|
--seed 42 \\
|
||||||
|
--gzip
|
||||||
|
|
||||||
|
cat <<-END_VERSIONS > versions.yml
|
||||||
|
"${task.process}":
|
||||||
|
adapterremoval: \$(AdapterRemoval --version 2>&1 | sed -e "s/AdapterRemoval ver. //g")
|
||||||
|
END_VERSIONS
|
||||||
|
"""
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
90
modules/nf-core/modules/adapterremoval/meta.yml
generated
Normal file
90
modules/nf-core/modules/adapterremoval/meta.yml
generated
Normal file
|
@ -0,0 +1,90 @@
|
||||||
|
name: adapterremoval
|
||||||
|
description: Trim sequencing adapters and collapse overlapping reads
|
||||||
|
keywords:
|
||||||
|
- trimming
|
||||||
|
- adapters
|
||||||
|
- merging
|
||||||
|
- fastq
|
||||||
|
tools:
|
||||||
|
- adapterremoval:
|
||||||
|
description: The AdapterRemoval v2 tool for merging and clipping reads.
|
||||||
|
homepage: https://github.com/MikkelSchubert/adapterremoval
|
||||||
|
documentation: https://adapterremoval.readthedocs.io
|
||||||
|
licence: ["GPL v3"]
|
||||||
|
|
||||||
|
input:
|
||||||
|
- meta:
|
||||||
|
type: map
|
||||||
|
description: |
|
||||||
|
Groovy Map containing sample information
|
||||||
|
e.g. [ id:'test', single_end:false ]
|
||||||
|
- reads:
|
||||||
|
type: file
|
||||||
|
description: |
|
||||||
|
List of input FastQ files of size 1 and 2 for single-end and paired-end data,
|
||||||
|
respectively.
|
||||||
|
pattern: "*.{fq,fastq,fq.gz,fastq.gz}"
|
||||||
|
- adapterlist:
|
||||||
|
type: file
|
||||||
|
description: Optional text file containing list of adapters to look for for removal
|
||||||
|
with one adapter per line. Otherwise will look for default adapters (see
|
||||||
|
AdapterRemoval man page), or can be modified to remove user-specified
|
||||||
|
adapters via ext.args.
|
||||||
|
|
||||||
|
output:
|
||||||
|
- meta:
|
||||||
|
type: map
|
||||||
|
description: |
|
||||||
|
Groovy Map containing sample information
|
||||||
|
e.g. [ id:'test', single_end:false ]
|
||||||
|
- singles_truncated:
|
||||||
|
type: file
|
||||||
|
description: |
|
||||||
|
Adapter trimmed FastQ files of either single-end reads, or singleton
|
||||||
|
'orphaned' reads from merging of paired-end data (i.e., one of the pair
|
||||||
|
was lost due to filtering thresholds).
|
||||||
|
pattern: "*.truncated.gz"
|
||||||
|
- discarded:
|
||||||
|
type: file
|
||||||
|
description: |
|
||||||
|
Adapter trimmed FastQ files of reads that did not pass filtering
|
||||||
|
thresholds.
|
||||||
|
pattern: "*.discarded.gz"
|
||||||
|
- pair1_truncated:
|
||||||
|
type: file
|
||||||
|
description: |
|
||||||
|
Adapter trimmed R1 FastQ files of paired-end reads that did not merge
|
||||||
|
with their respective R2 pair due to long templates. The respective pair
|
||||||
|
is stored in 'pair2_truncated'.
|
||||||
|
pattern: "*.pair1.truncated.gz"
|
||||||
|
- pair2_truncated:
|
||||||
|
type: file
|
||||||
|
description: |
|
||||||
|
Adapter trimmed R2 FastQ files of paired-end reads that did not merge
|
||||||
|
with their respective R1 pair due to long templates. The respective pair
|
||||||
|
is stored in 'pair1_truncated'.
|
||||||
|
pattern: "*.pair2.truncated.gz"
|
||||||
|
- collapsed:
|
||||||
|
type: file
|
||||||
|
description: |
|
||||||
|
Collapsed FastQ of paired-end reads that successfully merged with their
|
||||||
|
respective R1 pair but were not trimmed.
|
||||||
|
pattern: "*.collapsed.gz"
|
||||||
|
- collapsed_truncated:
|
||||||
|
type: file
|
||||||
|
description: |
|
||||||
|
Collapsed FastQ of paired-end reads that successfully merged with their
|
||||||
|
respective R1 pair and were trimmed of adapter due to sufficient overlap.
|
||||||
|
pattern: "*.collapsed.truncated.gz"
|
||||||
|
- log:
|
||||||
|
type: file
|
||||||
|
description: AdapterRemoval log file
|
||||||
|
pattern: "*.log"
|
||||||
|
- versions:
|
||||||
|
type: file
|
||||||
|
description: File containing software versions
|
||||||
|
pattern: "versions.yml"
|
||||||
|
|
||||||
|
authors:
|
||||||
|
- "@maxibor"
|
||||||
|
- "@jfy133"
|
|
@ -10,7 +10,10 @@
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"fa_icon": "fas fa-terminal",
|
"fa_icon": "fas fa-terminal",
|
||||||
"description": "Define where the pipeline should find input data and save output data.",
|
"description": "Define where the pipeline should find input data and save output data.",
|
||||||
"required": ["input", "outdir"],
|
"required": [
|
||||||
|
"input",
|
||||||
|
"outdir"
|
||||||
|
],
|
||||||
"properties": {
|
"properties": {
|
||||||
"input": {
|
"input": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
|
@ -173,7 +176,14 @@
|
||||||
"description": "Method used to save pipeline results to output directory.",
|
"description": "Method used to save pipeline results to output directory.",
|
||||||
"help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.",
|
"help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.",
|
||||||
"fa_icon": "fas fa-copy",
|
"fa_icon": "fas fa-copy",
|
||||||
"enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"],
|
"enum": [
|
||||||
|
"symlink",
|
||||||
|
"rellink",
|
||||||
|
"link",
|
||||||
|
"copy",
|
||||||
|
"copyNoFollow",
|
||||||
|
"move"
|
||||||
|
],
|
||||||
"hidden": true
|
"hidden": true
|
||||||
},
|
},
|
||||||
"email_on_fail": {
|
"email_on_fail": {
|
||||||
|
@ -284,7 +294,11 @@
|
||||||
},
|
},
|
||||||
"shortread_clipmerge_tool": {
|
"shortread_clipmerge_tool": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"default": "fastp"
|
"default": "fastp",
|
||||||
|
"enum": [
|
||||||
|
"fastp",
|
||||||
|
"adapterremoval"
|
||||||
|
]
|
||||||
},
|
},
|
||||||
"shortread_clipmerge_skipadaptertrim": {
|
"shortread_clipmerge_skipadaptertrim": {
|
||||||
"type": "boolean"
|
"type": "boolean"
|
||||||
|
@ -294,15 +308,15 @@
|
||||||
},
|
},
|
||||||
"shortread_clipmerge_adapter1": {
|
"shortread_clipmerge_adapter1": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"default": null
|
"default": "None"
|
||||||
},
|
},
|
||||||
"shortread_clipmerge_adapter2": {
|
"shortread_clipmerge_adapter2": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"default": null
|
"default": "None"
|
||||||
},
|
},
|
||||||
"shortread_clipmerge_minlength": {
|
"shortread_clipmerge_minlength": {
|
||||||
"type": "integer",
|
"type": "integer",
|
||||||
"default": 15
|
"default": 15
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
91
subworkflows/local/shortread_adapterremoval.nf
Normal file
91
subworkflows/local/shortread_adapterremoval.nf
Normal file
|
@ -0,0 +1,91 @@
|
||||||
|
/*
|
||||||
|
Process short raw reads with AdapterRemoval
|
||||||
|
*/
|
||||||
|
|
||||||
|
include { ADAPTERREMOVAL as ADAPTERREMOVAL_SINGLE } from '../../modules/nf-core/modules/adapterremoval/main'
|
||||||
|
include { ADAPTERREMOVAL as ADAPTERREMOVAL_PAIRED } from '../../modules/nf-core/modules/adapterremoval/main'
|
||||||
|
include { CAT_FASTQ } from '../../modules/nf-core/modules/cat/fastq/main'
|
||||||
|
|
||||||
|
workflow SHORTREAD_ADAPTERREMOVAL {
|
||||||
|
|
||||||
|
take:
|
||||||
|
reads // [[meta], [reads]]
|
||||||
|
|
||||||
|
main:
|
||||||
|
ch_versions = Channel.empty()
|
||||||
|
ch_multiqc_files = Channel.empty()
|
||||||
|
|
||||||
|
ch_input_for_adapterremoval = reads
|
||||||
|
.dump(tag: "pre_adapterremoval_branch")
|
||||||
|
.branch{
|
||||||
|
single: it[0]['single_end'] == true
|
||||||
|
paired: it[0]['single_end'] == false
|
||||||
|
}
|
||||||
|
|
||||||
|
ADAPTERREMOVAL_SINGLE ( ch_input_for_adapterremoval.single, [] )
|
||||||
|
ADAPTERREMOVAL_PAIRED ( ch_input_for_adapterremoval.paired, [] )
|
||||||
|
|
||||||
|
if ( params.shortread_clipmerge_mergepairs && !params.shortread_clipmerge_excludeunmerged ) {
|
||||||
|
ch_adapterremoval_for_cat = ADAPTERREMOVAL_PAIRED.out.collapsed
|
||||||
|
.mix(
|
||||||
|
ADAPTERREMOVAL_PAIRED.out.collapsed_truncated,
|
||||||
|
ADAPTERREMOVAL_PAIRED.out.singles_truncated,
|
||||||
|
ADAPTERREMOVAL_PAIRED.out.pair1_truncated,
|
||||||
|
ADAPTERREMOVAL_PAIRED.out.pair2_truncated
|
||||||
|
)
|
||||||
|
.map {
|
||||||
|
meta, reads ->
|
||||||
|
def meta_new = meta.clone()
|
||||||
|
meta_new['single_end'] = 1
|
||||||
|
|
||||||
|
[ meta_new, reads ]
|
||||||
|
}
|
||||||
|
.groupTuple(by: 0)
|
||||||
|
ch_adapterremoval_reads_prepped_pe = CAT_FASTQ ( ch_adapterremoval_for_cat ).reads
|
||||||
|
|
||||||
|
ch_adapterremoval_reads_prepped = ch_adapterremoval_reads_prepped_pe.mix( ADAPTERREMOVAL_SINGLE.out.singles_truncated )
|
||||||
|
|
||||||
|
} else if ( params.shortread_clipmerge_mergepairs && params.shortread_clipmerge_excludeunmerged ) {
|
||||||
|
ch_adapterremoval_for_cat = ADAPTERREMOVAL_PAIRED.out.collapsed
|
||||||
|
.mix( ADAPTERREMOVAL_PAIRED.out.collapsed_truncated )
|
||||||
|
.map {
|
||||||
|
meta, reads ->
|
||||||
|
def meta_new = meta.clone()
|
||||||
|
meta_new['single_end'] = 1
|
||||||
|
|
||||||
|
[ meta_new, reads ]
|
||||||
|
}
|
||||||
|
.groupTuple(by: 0)
|
||||||
|
|
||||||
|
ch_adapterremoval_reads_prepped_pe = CAT_FASTQ ( ch_adapterremoval_for_cat ).reads
|
||||||
|
|
||||||
|
ch_adapterremoval_reads_prepped = ch_adapterremoval_reads_prepped_pe.mix( ADAPTERREMOVAL_SINGLE.out.singles_truncated )
|
||||||
|
|
||||||
|
} else {
|
||||||
|
|
||||||
|
ch_adapterremoval_reads_prepped_pe = ADAPTERREMOVAL_PAIRED.out.pair1_truncated
|
||||||
|
.join( ADAPTERREMOVAL_PAIRED.out.pair2_truncated )
|
||||||
|
.dump(tag: "pre-group")
|
||||||
|
.groupTuple(by: 0)
|
||||||
|
.dump(tag: "post-group")
|
||||||
|
.map { meta, pair1, pair2 ->
|
||||||
|
[ meta, [ pair1, pair2 ].flatten() ]
|
||||||
|
}
|
||||||
|
.dump(tag: "post-map")
|
||||||
|
|
||||||
|
|
||||||
|
ch_adapterremoval_reads_prepped = ch_adapterremoval_reads_prepped_pe
|
||||||
|
.mix( ADAPTERREMOVAL_SINGLE.out.singles_truncated )
|
||||||
|
}
|
||||||
|
|
||||||
|
ch_processed_reads = ch_adapterremoval_reads_prepped
|
||||||
|
|
||||||
|
ch_versions = ch_versions.mix( ADAPTERREMOVAL_SINGLE.out.versions.first() )
|
||||||
|
ch_versions = ch_versions.mix( ADAPTERREMOVAL_PAIRED.out.versions.first() )
|
||||||
|
ch_multiqc_files = ch_multiqc_files.mix( ADAPTERREMOVAL_PAIRED.out.log.collect{it[1]}, ADAPTERREMOVAL_SINGLE.out.log.collect{it[1]} )
|
||||||
|
|
||||||
|
emit:
|
||||||
|
reads = ch_processed_reads // channel: [ val(meta), [ reads ] ]
|
||||||
|
versions = ch_versions // channel: [ versions.yml ]
|
||||||
|
mqc = ch_multiqc_files
|
||||||
|
}
|
|
@ -14,15 +14,11 @@ workflow SHORTREAD_FASTP {
|
||||||
ch_multiqc_files = Channel.empty()
|
ch_multiqc_files = Channel.empty()
|
||||||
|
|
||||||
ch_input_for_fastp = reads
|
ch_input_for_fastp = reads
|
||||||
.dump(tag: "pre-fastp_branch")
|
|
||||||
.branch{
|
.branch{
|
||||||
single: it[0]['single_end'] == true
|
single: it[0]['single_end'] == true
|
||||||
paired: it[0]['single_end'] == false
|
paired: it[0]['single_end'] == false
|
||||||
}
|
}
|
||||||
|
|
||||||
ch_input_for_fastp.single.dump(tag: "input_fastp_single")
|
|
||||||
ch_input_for_fastp.paired.dump(tag: "input_fastp_paired")
|
|
||||||
|
|
||||||
FASTP_SINGLE ( ch_input_for_fastp.single, false, false )
|
FASTP_SINGLE ( ch_input_for_fastp.single, false, false )
|
||||||
// Last parameter here turns on merging of PE data
|
// Last parameter here turns on merging of PE data
|
||||||
FASTP_PAIRED ( ch_input_for_fastp.paired, false, params.shortread_clipmerge_mergepairs )
|
FASTP_PAIRED ( ch_input_for_fastp.paired, false, params.shortread_clipmerge_mergepairs )
|
||||||
|
@ -46,13 +42,11 @@ workflow SHORTREAD_FASTP {
|
||||||
ch_versions = ch_versions.mix(FASTP_SINGLE.out.versions.first())
|
ch_versions = ch_versions.mix(FASTP_SINGLE.out.versions.first())
|
||||||
ch_versions = ch_versions.mix(FASTP_PAIRED.out.versions.first())
|
ch_versions = ch_versions.mix(FASTP_PAIRED.out.versions.first())
|
||||||
|
|
||||||
ch_processed_reads = ch_fastp_reads_prepped.dump(tag: "ch_fastp_reads_prepped")
|
ch_processed_reads = ch_fastp_reads_prepped
|
||||||
|
|
||||||
ch_multiqc_files = ch_multiqc_files.mix( FASTP_SINGLE.out.json.collect{it[1]} )
|
ch_multiqc_files = ch_multiqc_files.mix( FASTP_SINGLE.out.json.collect{it[1]} )
|
||||||
ch_multiqc_files = ch_multiqc_files.mix( FASTP_PAIRED.out.json.collect{it[1]} )
|
ch_multiqc_files = ch_multiqc_files.mix( FASTP_PAIRED.out.json.collect{it[1]} )
|
||||||
|
|
||||||
ch_multiqc_files.dump(tag: "preprocessing_fastp_mqc_final")
|
|
||||||
|
|
||||||
emit:
|
emit:
|
||||||
reads = ch_processed_reads // channel: [ val(meta), [ reads ] ]
|
reads = ch_processed_reads // channel: [ val(meta), [ reads ] ]
|
||||||
versions = ch_versions // channel: [ versions.yml ]
|
versions = ch_versions // channel: [ versions.yml ]
|
||||||
|
|
|
@ -4,6 +4,7 @@
|
||||||
|
|
||||||
|
|
||||||
include { SHORTREAD_FASTP } from './shortread_fastp'
|
include { SHORTREAD_FASTP } from './shortread_fastp'
|
||||||
|
include { SHORTREAD_ADAPTERREMOVAL } from './shortread_adapterremoval'
|
||||||
include { FASTQC as FASTQC_PROCESSED } from '../../modules/nf-core/modules/fastqc/main'
|
include { FASTQC as FASTQC_PROCESSED } from '../../modules/nf-core/modules/fastqc/main'
|
||||||
|
|
||||||
workflow SHORTREAD_PREPROCESSING {
|
workflow SHORTREAD_PREPROCESSING {
|
||||||
|
@ -18,6 +19,10 @@ workflow SHORTREAD_PREPROCESSING {
|
||||||
ch_processed_reads = SHORTREAD_FASTP ( reads ).reads
|
ch_processed_reads = SHORTREAD_FASTP ( reads ).reads
|
||||||
ch_versions = ch_versions.mix( SHORTREAD_FASTP.out.versions )
|
ch_versions = ch_versions.mix( SHORTREAD_FASTP.out.versions )
|
||||||
ch_multiqc_files = ch_multiqc_files.mix( SHORTREAD_FASTP.out.mqc )
|
ch_multiqc_files = ch_multiqc_files.mix( SHORTREAD_FASTP.out.mqc )
|
||||||
|
} else if ( params.shortread_clipmerge_tool == "adapterremoval" ) {
|
||||||
|
ch_processed_reads = SHORTREAD_ADAPTERREMOVAL ( reads ).reads
|
||||||
|
ch_versions = ch_versions.mix( SHORTREAD_ADAPTERREMOVAL.out.versions )
|
||||||
|
ch_multiqc_files = ch_multiqc_files.mix( SHORTREAD_ADAPTERREMOVAL.out.mqc )
|
||||||
} else {
|
} else {
|
||||||
ch_processed_reads = reads
|
ch_processed_reads = reads
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue