1
0
Fork 0
mirror of https://github.com/MillironX/taxprofiler.git synced 2024-09-21 05:52:04 +00:00

Merge pull request #42 from nf-core/refactor-adapterremoval

Refactor adapterremoval
This commit is contained in:
Moritz E. Beber 2022-04-05 12:04:36 +02:00 committed by GitHub
commit b37a9c5576
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 59 additions and 78 deletions

View file

@ -4,7 +4,7 @@
"repos": {
"nf-core/modules": {
"adapterremoval": {
"git_sha": "f0800157544a82ae222931764483331a81812012"
"git_sha": "879d42c5e28661fe0a5e744c9e2c515868f9e08a"
},
"cat/fastq": {
"git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d"

View file

@ -12,15 +12,14 @@ process ADAPTERREMOVAL {
path(adapterlist)
output:
tuple val(meta), path("${prefix}.truncated.gz") , optional: true, emit: singles_truncated
tuple val(meta), path("${prefix}.discarded.gz") , optional: true, emit: discarded
tuple val(meta), path("${prefix}.pair1.truncated.gz") , optional: true, emit: pair1_truncated
tuple val(meta), path("${prefix}.pair2.truncated.gz") , optional: true, emit: pair2_truncated
tuple val(meta), path("${prefix}.collapsed.gz") , optional: true, emit: collapsed
tuple val(meta), path("${prefix}.collapsed.truncated.gz") , optional: true, emit: collapsed_truncated
tuple val(meta), path("${prefix}.paired.gz") , optional: true, emit: paired_interleaved
tuple val(meta), path('*.log') , emit: log
path "versions.yml" , emit: versions
tuple val(meta), path("${prefix}.truncated.fastq.gz") , optional: true, emit: singles_truncated
tuple val(meta), path("${prefix}.discarded.fastq.gz") , optional: true, emit: discarded
tuple val(meta), path("${prefix}.pair{1,2}.truncated.fastq.gz") , optional: true, emit: paired_truncated
tuple val(meta), path("${prefix}.collapsed.fastq.gz") , optional: true, emit: collapsed
tuple val(meta), path("${prefix}.collapsed.truncated.fastq.gz") , optional: true, emit: collapsed_truncated
tuple val(meta), path("${prefix}.paired.fastq.gz") , optional: true, emit: paired_interleaved
tuple val(meta), path('*.settings') , emit: settings
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
@ -38,10 +37,19 @@ process ADAPTERREMOVAL {
$adapterlist \\
--basename ${prefix} \\
--threads ${task.cpus} \\
--settings ${prefix}.log \\
--seed 42 \\
--gzip
ensure_fastq() {
if [ -f "\${1}" ]; then
mv "\${1}" "\${1::-3}.fastq.gz"
fi
}
ensure_fastq '${prefix}.truncated.gz'
ensure_fastq '${prefix}.discarded.gz'
cat <<-END_VERSIONS > versions.yml
"${task.process}":
adapterremoval: \$(AdapterRemoval --version 2>&1 | sed -e "s/AdapterRemoval ver. //g")
@ -56,10 +64,24 @@ process ADAPTERREMOVAL {
$adapterlist \\
--basename ${prefix} \\
--threads $task.cpus \\
--settings ${prefix}.log \\
--seed 42 \\
--gzip
ensure_fastq() {
if [ -f "\${1}" ]; then
mv "\${1}" "\${1::-3}.fastq.gz"
fi
}
ensure_fastq '${prefix}.truncated.gz'
ensure_fastq '${prefix}.discarded.gz'
ensure_fastq '${prefix}.pair1.truncated.gz'
ensure_fastq '${prefix}.pair2.truncated.gz'
ensure_fastq '${prefix}.collapsed.gz'
ensure_fastq '${prefix}.collapsed.truncated.gz'
ensure_fastq '${prefix}.paired.gz'
cat <<-END_VERSIONS > versions.yml
"${task.process}":
adapterremoval: \$(AdapterRemoval --version 2>&1 | sed -e "s/AdapterRemoval ver. //g")

View file

@ -43,43 +43,43 @@ output:
Adapter trimmed FastQ files of either single-end reads, or singleton
'orphaned' reads from merging of paired-end data (i.e., one of the pair
was lost due to filtering thresholds).
pattern: "*.truncated.gz"
pattern: "*.truncated.fastq.gz"
- discarded:
type: file
description: |
Adapter trimmed FastQ files of reads that did not pass filtering
thresholds.
pattern: "*.discarded.gz"
pattern: "*.discarded.fastq.gz"
- pair1_truncated:
type: file
description: |
Adapter trimmed R1 FastQ files of paired-end reads that did not merge
with their respective R2 pair due to long templates. The respective pair
is stored in 'pair2_truncated'.
pattern: "*.pair1.truncated.gz"
pattern: "*.pair1.truncated.fastq.gz"
- pair2_truncated:
type: file
description: |
Adapter trimmed R2 FastQ files of paired-end reads that did not merge
with their respective R1 pair due to long templates. The respective pair
is stored in 'pair1_truncated'.
pattern: "*.pair2.truncated.gz"
pattern: "*.pair2.truncated.fastq.gz"
- collapsed:
type: file
description: |
Collapsed FastQ of paired-end reads that successfully merged with their
respective R1 pair but were not trimmed.
pattern: "*.collapsed.gz"
pattern: "*.collapsed.fastq.gz"
- collapsed_truncated:
type: file
description: |
Collapsed FastQ of paired-end reads that successfully merged with their
respective R1 pair and were trimmed of adapter due to sufficient overlap.
pattern: "*.collapsed.truncated.gz"
pattern: "*.collapsed.truncated.fastq.gz"
- log:
type: file
description: AdapterRemoval log file
pattern: "*.log"
pattern: "*.settings"
- versions:
type: file
description: File containing software versions

View file

@ -5,11 +5,6 @@ Process short raw reads with AdapterRemoval
include { ADAPTERREMOVAL as ADAPTERREMOVAL_SINGLE } from '../../modules/nf-core/modules/adapterremoval/main'
include { ADAPTERREMOVAL as ADAPTERREMOVAL_PAIRED } from '../../modules/nf-core/modules/adapterremoval/main'
include { CAT_FASTQ } from '../../modules/nf-core/modules/cat/fastq/main'
include {
ENSURE_FASTQ_EXTENSION as ENSURE_FASTQ_EXTENSION1;
ENSURE_FASTQ_EXTENSION as ENSURE_FASTQ_EXTENSION2;
ENSURE_FASTQ_EXTENSION as ENSURE_FASTQ_EXTENSION3;
} from '../../modules/local/ensure_fastq_extension'
workflow SHORTREAD_ADAPTERREMOVAL {
@ -36,34 +31,27 @@ workflow SHORTREAD_ADAPTERREMOVAL {
if ( params.shortread_clipmerge_mergepairs && !params.shortread_clipmerge_excludeunmerged ) {
ENSURE_FASTQ_EXTENSION1(
Channel.empty().mix(
ch_concat_fastq = Channel.empty()
.mix(
ADAPTERREMOVAL_PAIRED.out.collapsed,
ADAPTERREMOVAL_PAIRED.out.collapsed_truncated,
ADAPTERREMOVAL_PAIRED.out.singles_truncated,
ADAPTERREMOVAL_PAIRED.out.pair1_truncated,
ADAPTERREMOVAL_PAIRED.out.pair2_truncated
ADAPTERREMOVAL_PAIRED.out.paired_truncated
)
.map { meta, reads ->
meta.single_end = true
[meta, reads]
}
)
.groupTuple()
// Paired-end reads cause a nested tuple during grouping.
// We want to present a flat list of files to `CAT_FASTQ`.
.map { meta, fastq -> [meta, fastq.flatten()] }
CAT_FASTQ(
ENSURE_FASTQ_EXTENSION1.out.reads
.groupTuple()
)
ENSURE_FASTQ_EXTENSION2(ADAPTERREMOVAL_SINGLE.out.singles_truncated)
CAT_FASTQ(ch_concat_fastq)
ch_adapterremoval_reads_prepped = CAT_FASTQ.out.reads
.mix(ENSURE_FASTQ_EXTENSION2.out.reads)
.mix(ADAPTERREMOVAL_SINGLE.out.singles_truncated)
} else if ( params.shortread_clipmerge_mergepairs && params.shortread_clipmerge_excludeunmerged ) {
ENSURE_FASTQ_EXTENSION1(
Channel.empty().mix(
ch_concat_fastq = Channel.empty()
.mix(
ADAPTERREMOVAL_PAIRED.out.collapsed,
ADAPTERREMOVAL_PAIRED.out.collapsed_truncated
)
@ -71,54 +59,25 @@ workflow SHORTREAD_ADAPTERREMOVAL {
meta.single_end = true
[meta, reads]
}
)
.groupTuple()
CAT_FASTQ(
ENSURE_FASTQ_EXTENSION1.out.reads
.groupTuple()
)
ENSURE_FASTQ_EXTENSION2(ADAPTERREMOVAL_SINGLE.out.singles_truncated)
CAT_FASTQ(ch_concat_fastq)
ch_adapterremoval_reads_prepped = CAT_FASTQ.out.reads
.mix(ENSURE_FASTQ_EXTENSION2.out.reads)
.mix(ADAPTERREMOVAL_SINGLE.out.singles_truncated)
} else {
ENSURE_FASTQ_EXTENSION1(
ADAPTERREMOVAL_PAIRED.out.pair1_truncated
.map { meta, reads ->
meta.single_end = true
[meta, reads]
}
)
ENSURE_FASTQ_EXTENSION2(
ADAPTERREMOVAL_PAIRED.out.pair2_truncated
.map { meta, reads ->
meta.single_end = true
[meta, reads]
}
)
ENSURE_FASTQ_EXTENSION3(ADAPTERREMOVAL_SINGLE.out.singles_truncated)
ch_adapterremoval_reads_prepped = ENSURE_FASTQ_EXTENSION1.out.reads
.join(ENSURE_FASTQ_EXTENSION2.out.reads)
.groupTuple()
.map { meta, pair1, pair2 ->
meta.single_end = false
[ meta, [ pair1, pair2 ].flatten() ]
}
.mix(ENSURE_FASTQ_EXTENSION3.out.reads)
ch_adapterremoval_reads_prepped = ADAPTERREMOVAL_PAIRED.out.paired_truncated
.mix(ADAPTERREMOVAL_SINGLE.out.singles_truncated)
}
ch_versions = ch_versions.mix( ADAPTERREMOVAL_SINGLE.out.versions.first() )
ch_versions = ch_versions.mix( ADAPTERREMOVAL_PAIRED.out.versions.first() )
ch_multiqc_files = ch_multiqc_files.mix(
ADAPTERREMOVAL_PAIRED.out.log.collect{it[1]},
ADAPTERREMOVAL_SINGLE.out.log.collect{it[1]}
ADAPTERREMOVAL_PAIRED.out.settings.collect{it[1]},
ADAPTERREMOVAL_SINGLE.out.settings.collect{it[1]}
)
emit: