1
0
Fork 0
mirror of https://github.com/MillironX/taxprofiler.git synced 2024-11-10 21:53:09 +00:00

Start working on adding adapterremoval

This commit is contained in:
James Fellows Yates 2022-03-31 15:31:45 +02:00
parent 323883bd3e
commit c4c93bd59d
7 changed files with 281 additions and 14 deletions

View file

@ -3,6 +3,9 @@
"homePage": "https://github.com/nf-core/taxprofiler",
"repos": {
"nf-core/modules": {
"adapterremoval": {
"git_sha": "f0800157544a82ae222931764483331a81812012"
},
"cat/fastq": {
"git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d"
},
@ -32,4 +35,4 @@
}
}
}
}
}

View file

@ -0,0 +1,70 @@
process ADAPTERREMOVAL {
tag "$meta.id"
label 'process_medium'
conda (params.enable_conda ? "bioconda::adapterremoval=2.3.2" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/adapterremoval:2.3.2--hb7ba0dd_0' :
'quay.io/biocontainers/adapterremoval:2.3.2--hb7ba0dd_0' }"
input:
tuple val(meta), path(reads)
path(adapterlist)
output:
tuple val(meta), path("${prefix}.truncated.gz") , optional: true, emit: singles_truncated
tuple val(meta), path("${prefix}.discarded.gz") , optional: true, emit: discarded
tuple val(meta), path("${prefix}.pair1.truncated.gz") , optional: true, emit: pair1_truncated
tuple val(meta), path("${prefix}.pair2.truncated.gz") , optional: true, emit: pair2_truncated
tuple val(meta), path("${prefix}.collapsed.gz") , optional: true, emit: collapsed
tuple val(meta), path("${prefix}.collapsed.truncated.gz") , optional: true, emit: collapsed_truncated
tuple val(meta), path("${prefix}.paired.gz") , optional: true, emit: paired_interleaved
tuple val(meta), path('*.log') , emit: log
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
def list = adapterlist ? "--adapter-list ${adapterlist}" : ""
prefix = task.ext.prefix ?: "${meta.id}"
if (meta.single_end) {
"""
AdapterRemoval \\
--file1 $reads \\
$args \\
$adapterlist \\
--basename ${prefix} \\
--threads ${task.cpus} \\
--settings ${prefix}.log \\
--seed 42 \\
--gzip
cat <<-END_VERSIONS > versions.yml
"${task.process}":
adapterremoval: \$(AdapterRemoval --version 2>&1 | sed -e "s/AdapterRemoval ver. //g")
END_VERSIONS
"""
} else {
"""
AdapterRemoval \\
--file1 ${reads[0]} \\
--file2 ${reads[1]} \\
$args \\
$adapterlist \\
--basename ${prefix} \\
--threads $task.cpus \\
--settings ${prefix}.log \\
--seed 42 \\
--gzip
cat <<-END_VERSIONS > versions.yml
"${task.process}":
adapterremoval: \$(AdapterRemoval --version 2>&1 | sed -e "s/AdapterRemoval ver. //g")
END_VERSIONS
"""
}
}

View file

@ -0,0 +1,90 @@
name: adapterremoval
description: Trim sequencing adapters and collapse overlapping reads
keywords:
- trimming
- adapters
- merging
- fastq
tools:
- adapterremoval:
description: The AdapterRemoval v2 tool for merging and clipping reads.
homepage: https://github.com/MikkelSchubert/adapterremoval
documentation: https://adapterremoval.readthedocs.io
licence: ["GPL v3"]
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- reads:
type: file
description: |
List of input FastQ files of size 1 and 2 for single-end and paired-end data,
respectively.
pattern: "*.{fq,fastq,fq.gz,fastq.gz}"
- adapterlist:
type: file
description: Optional text file containing list of adapters to look for for removal
with one adapter per line. Otherwise will look for default adapters (see
AdapterRemoval man page), or can be modified to remove user-specified
adapters via ext.args.
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- singles_truncated:
type: file
description: |
Adapter trimmed FastQ files of either single-end reads, or singleton
'orphaned' reads from merging of paired-end data (i.e., one of the pair
was lost due to filtering thresholds).
pattern: "*.truncated.gz"
- discarded:
type: file
description: |
Adapter trimmed FastQ files of reads that did not pass filtering
thresholds.
pattern: "*.discarded.gz"
- pair1_truncated:
type: file
description: |
Adapter trimmed R1 FastQ files of paired-end reads that did not merge
with their respective R2 pair due to long templates. The respective pair
is stored in 'pair2_truncated'.
pattern: "*.pair1.truncated.gz"
- pair2_truncated:
type: file
description: |
Adapter trimmed R2 FastQ files of paired-end reads that did not merge
with their respective R1 pair due to long templates. The respective pair
is stored in 'pair1_truncated'.
pattern: "*.pair2.truncated.gz"
- collapsed:
type: file
description: |
Collapsed FastQ of paired-end reads that successfully merged with their
respective R1 pair but were not trimmed.
pattern: "*.collapsed.gz"
- collapsed_truncated:
type: file
description: |
Collapsed FastQ of paired-end reads that successfully merged with their
respective R1 pair and were trimmed of adapter due to sufficient overlap.
pattern: "*.collapsed.truncated.gz"
- log:
type: file
description: AdapterRemoval log file
pattern: "*.log"
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
authors:
- "@maxibor"
- "@jfy133"

View file

@ -10,7 +10,10 @@
"type": "object",
"fa_icon": "fas fa-terminal",
"description": "Define where the pipeline should find input data and save output data.",
"required": ["input", "outdir"],
"required": [
"input",
"outdir"
],
"properties": {
"input": {
"type": "string",
@ -173,7 +176,14 @@
"description": "Method used to save pipeline results to output directory.",
"help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.",
"fa_icon": "fas fa-copy",
"enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"],
"enum": [
"symlink",
"rellink",
"link",
"copy",
"copyNoFollow",
"move"
],
"hidden": true
},
"email_on_fail": {
@ -284,7 +294,11 @@
},
"shortread_clipmerge_tool": {
"type": "string",
"default": "fastp"
"default": "fastp",
"enum": [
"fastp",
"adapterremoval"
]
},
"shortread_clipmerge_skipadaptertrim": {
"type": "boolean"
@ -294,15 +308,15 @@
},
"shortread_clipmerge_adapter1": {
"type": "string",
"default": null
"default": "None"
},
"shortread_clipmerge_adapter2": {
"type": "string",
"default": null
"default": "None"
},
"shortread_clipmerge_minlength": {
"type": "integer",
"default": 15
}
}
}
}

View file

@ -0,0 +1,91 @@
/*
Process short raw reads with AdapterRemoval
*/
include { ADAPTERREMOVAL as ADAPTERREMOVAL_SINGLE } from '../../modules/nf-core/modules/adapterremoval/main'
include { ADAPTERREMOVAL as ADAPTERREMOVAL_PAIRED } from '../../modules/nf-core/modules/adapterremoval/main'
include { CAT_FASTQ } from '../../modules/nf-core/modules/cat/fastq/main'
workflow SHORTREAD_ADAPTERREMOVAL {
take:
reads // [[meta], [reads]]
main:
ch_versions = Channel.empty()
ch_multiqc_files = Channel.empty()
ch_input_for_adapterremoval = reads
.dump(tag: "pre_adapterremoval_branch")
.branch{
single: it[0]['single_end'] == true
paired: it[0]['single_end'] == false
}
ADAPTERREMOVAL_SINGLE ( ch_input_for_adapterremoval.single, [] )
ADAPTERREMOVAL_PAIRED ( ch_input_for_adapterremoval.paired, [] )
if ( params.shortread_clipmerge_mergepairs && !params.shortread_clipmerge_excludeunmerged ) {
ch_adapterremoval_for_cat = ADAPTERREMOVAL_PAIRED.out.collapsed
.mix(
ADAPTERREMOVAL_PAIRED.out.collapsed_truncated,
ADAPTERREMOVAL_PAIRED.out.singles_truncated,
ADAPTERREMOVAL_PAIRED.out.pair1_truncated,
ADAPTERREMOVAL_PAIRED.out.pair2_truncated
)
.map {
meta, reads ->
def meta_new = meta.clone()
meta_new['single_end'] = 1
[ meta_new, reads ]
}
.groupTuple(by: 0)
ch_adapterremoval_reads_prepped_pe = CAT_FASTQ ( ch_adapterremoval_for_cat ).reads
ch_adapterremoval_reads_prepped = ch_adapterremoval_reads_prepped_pe.mix( ADAPTERREMOVAL_SINGLE.out.singles_truncated )
} else if ( params.shortread_clipmerge_mergepairs && params.shortread_clipmerge_excludeunmerged ) {
ch_adapterremoval_for_cat = ADAPTERREMOVAL_PAIRED.out.collapsed
.mix( ADAPTERREMOVAL_PAIRED.out.collapsed_truncated )
.map {
meta, reads ->
def meta_new = meta.clone()
meta_new['single_end'] = 1
[ meta_new, reads ]
}
.groupTuple(by: 0)
ch_adapterremoval_reads_prepped_pe = CAT_FASTQ ( ch_adapterremoval_for_cat ).reads
ch_adapterremoval_reads_prepped = ch_adapterremoval_reads_prepped_pe.mix( ADAPTERREMOVAL_SINGLE.out.singles_truncated )
} else {
ch_adapterremoval_reads_prepped_pe = ADAPTERREMOVAL_PAIRED.out.pair1_truncated
.join( ADAPTERREMOVAL_PAIRED.out.pair2_truncated )
.dump(tag: "pre-group")
.groupTuple(by: 0)
.dump(tag: "post-group")
.map { meta, pair1, pair2 ->
[ meta, [ pair1, pair2 ].flatten() ]
}
.dump(tag: "post-map")
ch_adapterremoval_reads_prepped = ch_adapterremoval_reads_prepped_pe
.mix( ADAPTERREMOVAL_SINGLE.out.singles_truncated )
}
ch_processed_reads = ch_adapterremoval_reads_prepped
ch_versions = ch_versions.mix( ADAPTERREMOVAL_SINGLE.out.versions.first() )
ch_versions = ch_versions.mix( ADAPTERREMOVAL_PAIRED.out.versions.first() )
ch_multiqc_files = ch_multiqc_files.mix( ADAPTERREMOVAL_PAIRED.out.log.collect{it[1]}, ADAPTERREMOVAL_SINGLE.out.log.collect{it[1]} )
emit:
reads = ch_processed_reads // channel: [ val(meta), [ reads ] ]
versions = ch_versions // channel: [ versions.yml ]
mqc = ch_multiqc_files
}

View file

@ -14,15 +14,11 @@ workflow SHORTREAD_FASTP {
ch_multiqc_files = Channel.empty()
ch_input_for_fastp = reads
.dump(tag: "pre-fastp_branch")
.branch{
single: it[0]['single_end'] == true
paired: it[0]['single_end'] == false
}
ch_input_for_fastp.single.dump(tag: "input_fastp_single")
ch_input_for_fastp.paired.dump(tag: "input_fastp_paired")
FASTP_SINGLE ( ch_input_for_fastp.single, false, false )
// Last parameter here turns on merging of PE data
FASTP_PAIRED ( ch_input_for_fastp.paired, false, params.shortread_clipmerge_mergepairs )
@ -46,13 +42,11 @@ workflow SHORTREAD_FASTP {
ch_versions = ch_versions.mix(FASTP_SINGLE.out.versions.first())
ch_versions = ch_versions.mix(FASTP_PAIRED.out.versions.first())
ch_processed_reads = ch_fastp_reads_prepped.dump(tag: "ch_fastp_reads_prepped")
ch_processed_reads = ch_fastp_reads_prepped
ch_multiqc_files = ch_multiqc_files.mix( FASTP_SINGLE.out.json.collect{it[1]} )
ch_multiqc_files = ch_multiqc_files.mix( FASTP_PAIRED.out.json.collect{it[1]} )
ch_multiqc_files.dump(tag: "preprocessing_fastp_mqc_final")
emit:
reads = ch_processed_reads // channel: [ val(meta), [ reads ] ]
versions = ch_versions // channel: [ versions.yml ]

View file

@ -4,6 +4,7 @@
include { SHORTREAD_FASTP } from './shortread_fastp'
include { SHORTREAD_ADAPTERREMOVAL } from './shortread_adapterremoval'
include { FASTQC as FASTQC_PROCESSED } from '../../modules/nf-core/modules/fastqc/main'
workflow SHORTREAD_PREPROCESSING {
@ -18,6 +19,10 @@ workflow SHORTREAD_PREPROCESSING {
ch_processed_reads = SHORTREAD_FASTP ( reads ).reads
ch_versions = ch_versions.mix( SHORTREAD_FASTP.out.versions )
ch_multiqc_files = ch_multiqc_files.mix( SHORTREAD_FASTP.out.mqc )
} else if ( params.shortread_clipmerge_tool == "adapterremoval" ) {
ch_processed_reads = SHORTREAD_ADAPTERREMOVAL ( reads ).reads
ch_versions = ch_versions.mix( SHORTREAD_ADAPTERREMOVAL.out.versions )
ch_multiqc_files = ch_multiqc_files.mix( SHORTREAD_ADAPTERREMOVAL.out.mqc )
} else {
ch_processed_reads = reads
}