1
0
Fork 0
mirror of https://github.com/MillironX/taxprofiler.git synced 2024-11-22 23:19:54 +00:00

Merge pull request #42 from nf-core/refactor-adapterremoval

Refactor adapterremoval
This commit is contained in:
Moritz E. Beber 2022-04-05 12:04:36 +02:00 committed by GitHub
commit b37a9c5576
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 59 additions and 78 deletions

View file

@ -4,7 +4,7 @@
"repos": { "repos": {
"nf-core/modules": { "nf-core/modules": {
"adapterremoval": { "adapterremoval": {
"git_sha": "f0800157544a82ae222931764483331a81812012" "git_sha": "879d42c5e28661fe0a5e744c9e2c515868f9e08a"
}, },
"cat/fastq": { "cat/fastq": {
"git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d"

View file

@ -12,14 +12,13 @@ process ADAPTERREMOVAL {
path(adapterlist) path(adapterlist)
output: output:
tuple val(meta), path("${prefix}.truncated.gz") , optional: true, emit: singles_truncated tuple val(meta), path("${prefix}.truncated.fastq.gz") , optional: true, emit: singles_truncated
tuple val(meta), path("${prefix}.discarded.gz") , optional: true, emit: discarded tuple val(meta), path("${prefix}.discarded.fastq.gz") , optional: true, emit: discarded
tuple val(meta), path("${prefix}.pair1.truncated.gz") , optional: true, emit: pair1_truncated tuple val(meta), path("${prefix}.pair{1,2}.truncated.fastq.gz") , optional: true, emit: paired_truncated
tuple val(meta), path("${prefix}.pair2.truncated.gz") , optional: true, emit: pair2_truncated tuple val(meta), path("${prefix}.collapsed.fastq.gz") , optional: true, emit: collapsed
tuple val(meta), path("${prefix}.collapsed.gz") , optional: true, emit: collapsed tuple val(meta), path("${prefix}.collapsed.truncated.fastq.gz") , optional: true, emit: collapsed_truncated
tuple val(meta), path("${prefix}.collapsed.truncated.gz") , optional: true, emit: collapsed_truncated tuple val(meta), path("${prefix}.paired.fastq.gz") , optional: true, emit: paired_interleaved
tuple val(meta), path("${prefix}.paired.gz") , optional: true, emit: paired_interleaved tuple val(meta), path('*.settings') , emit: settings
tuple val(meta), path('*.log') , emit: log
path "versions.yml" , emit: versions path "versions.yml" , emit: versions
when: when:
@ -38,10 +37,19 @@ process ADAPTERREMOVAL {
$adapterlist \\ $adapterlist \\
--basename ${prefix} \\ --basename ${prefix} \\
--threads ${task.cpus} \\ --threads ${task.cpus} \\
--settings ${prefix}.log \\
--seed 42 \\ --seed 42 \\
--gzip --gzip
ensure_fastq() {
if [ -f "\${1}" ]; then
mv "\${1}" "\${1::-3}.fastq.gz"
fi
}
ensure_fastq '${prefix}.truncated.gz'
ensure_fastq '${prefix}.discarded.gz'
cat <<-END_VERSIONS > versions.yml cat <<-END_VERSIONS > versions.yml
"${task.process}": "${task.process}":
adapterremoval: \$(AdapterRemoval --version 2>&1 | sed -e "s/AdapterRemoval ver. //g") adapterremoval: \$(AdapterRemoval --version 2>&1 | sed -e "s/AdapterRemoval ver. //g")
@ -56,10 +64,24 @@ process ADAPTERREMOVAL {
$adapterlist \\ $adapterlist \\
--basename ${prefix} \\ --basename ${prefix} \\
--threads $task.cpus \\ --threads $task.cpus \\
--settings ${prefix}.log \\
--seed 42 \\ --seed 42 \\
--gzip --gzip
ensure_fastq() {
if [ -f "\${1}" ]; then
mv "\${1}" "\${1::-3}.fastq.gz"
fi
}
ensure_fastq '${prefix}.truncated.gz'
ensure_fastq '${prefix}.discarded.gz'
ensure_fastq '${prefix}.pair1.truncated.gz'
ensure_fastq '${prefix}.pair2.truncated.gz'
ensure_fastq '${prefix}.collapsed.gz'
ensure_fastq '${prefix}.collapsed.truncated.gz'
ensure_fastq '${prefix}.paired.gz'
cat <<-END_VERSIONS > versions.yml cat <<-END_VERSIONS > versions.yml
"${task.process}": "${task.process}":
adapterremoval: \$(AdapterRemoval --version 2>&1 | sed -e "s/AdapterRemoval ver. //g") adapterremoval: \$(AdapterRemoval --version 2>&1 | sed -e "s/AdapterRemoval ver. //g")

View file

@ -43,43 +43,43 @@ output:
Adapter trimmed FastQ files of either single-end reads, or singleton Adapter trimmed FastQ files of either single-end reads, or singleton
'orphaned' reads from merging of paired-end data (i.e., one of the pair 'orphaned' reads from merging of paired-end data (i.e., one of the pair
was lost due to filtering thresholds). was lost due to filtering thresholds).
pattern: "*.truncated.gz" pattern: "*.truncated.fastq.gz"
- discarded: - discarded:
type: file type: file
description: | description: |
Adapter trimmed FastQ files of reads that did not pass filtering Adapter trimmed FastQ files of reads that did not pass filtering
thresholds. thresholds.
pattern: "*.discarded.gz" pattern: "*.discarded.fastq.gz"
- pair1_truncated: - pair1_truncated:
type: file type: file
description: | description: |
Adapter trimmed R1 FastQ files of paired-end reads that did not merge Adapter trimmed R1 FastQ files of paired-end reads that did not merge
with their respective R2 pair due to long templates. The respective pair with their respective R2 pair due to long templates. The respective pair
is stored in 'pair2_truncated'. is stored in 'pair2_truncated'.
pattern: "*.pair1.truncated.gz" pattern: "*.pair1.truncated.fastq.gz"
- pair2_truncated: - pair2_truncated:
type: file type: file
description: | description: |
Adapter trimmed R2 FastQ files of paired-end reads that did not merge Adapter trimmed R2 FastQ files of paired-end reads that did not merge
with their respective R1 pair due to long templates. The respective pair with their respective R1 pair due to long templates. The respective pair
is stored in 'pair1_truncated'. is stored in 'pair1_truncated'.
pattern: "*.pair2.truncated.gz" pattern: "*.pair2.truncated.fastq.gz"
- collapsed: - collapsed:
type: file type: file
description: | description: |
Collapsed FastQ of paired-end reads that successfully merged with their Collapsed FastQ of paired-end reads that successfully merged with their
respective R1 pair but were not trimmed. respective R1 pair but were not trimmed.
pattern: "*.collapsed.gz" pattern: "*.collapsed.fastq.gz"
- collapsed_truncated: - collapsed_truncated:
type: file type: file
description: | description: |
Collapsed FastQ of paired-end reads that successfully merged with their Collapsed FastQ of paired-end reads that successfully merged with their
respective R1 pair and were trimmed of adapter due to sufficient overlap. respective R1 pair and were trimmed of adapter due to sufficient overlap.
pattern: "*.collapsed.truncated.gz" pattern: "*.collapsed.truncated.fastq.gz"
- log: - log:
type: file type: file
description: AdapterRemoval log file description: AdapterRemoval log file
pattern: "*.log" pattern: "*.settings"
- versions: - versions:
type: file type: file
description: File containing software versions description: File containing software versions

View file

@ -5,11 +5,6 @@ Process short raw reads with AdapterRemoval
include { ADAPTERREMOVAL as ADAPTERREMOVAL_SINGLE } from '../../modules/nf-core/modules/adapterremoval/main' include { ADAPTERREMOVAL as ADAPTERREMOVAL_SINGLE } from '../../modules/nf-core/modules/adapterremoval/main'
include { ADAPTERREMOVAL as ADAPTERREMOVAL_PAIRED } from '../../modules/nf-core/modules/adapterremoval/main' include { ADAPTERREMOVAL as ADAPTERREMOVAL_PAIRED } from '../../modules/nf-core/modules/adapterremoval/main'
include { CAT_FASTQ } from '../../modules/nf-core/modules/cat/fastq/main' include { CAT_FASTQ } from '../../modules/nf-core/modules/cat/fastq/main'
include {
ENSURE_FASTQ_EXTENSION as ENSURE_FASTQ_EXTENSION1;
ENSURE_FASTQ_EXTENSION as ENSURE_FASTQ_EXTENSION2;
ENSURE_FASTQ_EXTENSION as ENSURE_FASTQ_EXTENSION3;
} from '../../modules/local/ensure_fastq_extension'
workflow SHORTREAD_ADAPTERREMOVAL { workflow SHORTREAD_ADAPTERREMOVAL {
@ -36,34 +31,27 @@ workflow SHORTREAD_ADAPTERREMOVAL {
if ( params.shortread_clipmerge_mergepairs && !params.shortread_clipmerge_excludeunmerged ) { if ( params.shortread_clipmerge_mergepairs && !params.shortread_clipmerge_excludeunmerged ) {
ENSURE_FASTQ_EXTENSION1( ch_concat_fastq = Channel.empty()
Channel.empty().mix( .mix(
ADAPTERREMOVAL_PAIRED.out.collapsed, ADAPTERREMOVAL_PAIRED.out.collapsed,
ADAPTERREMOVAL_PAIRED.out.collapsed_truncated, ADAPTERREMOVAL_PAIRED.out.collapsed_truncated,
ADAPTERREMOVAL_PAIRED.out.singles_truncated, ADAPTERREMOVAL_PAIRED.out.singles_truncated,
ADAPTERREMOVAL_PAIRED.out.pair1_truncated, ADAPTERREMOVAL_PAIRED.out.paired_truncated
ADAPTERREMOVAL_PAIRED.out.pair2_truncated
) )
.map { meta, reads ->
meta.single_end = true
[meta, reads]
}
)
CAT_FASTQ(
ENSURE_FASTQ_EXTENSION1.out.reads
.groupTuple() .groupTuple()
) // Paired-end reads cause a nested tuple during grouping.
// We want to present a flat list of files to `CAT_FASTQ`.
.map { meta, fastq -> [meta, fastq.flatten()] }
ENSURE_FASTQ_EXTENSION2(ADAPTERREMOVAL_SINGLE.out.singles_truncated) CAT_FASTQ(ch_concat_fastq)
ch_adapterremoval_reads_prepped = CAT_FASTQ.out.reads ch_adapterremoval_reads_prepped = CAT_FASTQ.out.reads
.mix(ENSURE_FASTQ_EXTENSION2.out.reads) .mix(ADAPTERREMOVAL_SINGLE.out.singles_truncated)
} else if ( params.shortread_clipmerge_mergepairs && params.shortread_clipmerge_excludeunmerged ) { } else if ( params.shortread_clipmerge_mergepairs && params.shortread_clipmerge_excludeunmerged ) {
ENSURE_FASTQ_EXTENSION1( ch_concat_fastq = Channel.empty()
Channel.empty().mix( .mix(
ADAPTERREMOVAL_PAIRED.out.collapsed, ADAPTERREMOVAL_PAIRED.out.collapsed,
ADAPTERREMOVAL_PAIRED.out.collapsed_truncated ADAPTERREMOVAL_PAIRED.out.collapsed_truncated
) )
@ -71,54 +59,25 @@ workflow SHORTREAD_ADAPTERREMOVAL {
meta.single_end = true meta.single_end = true
[meta, reads] [meta, reads]
} }
)
CAT_FASTQ(
ENSURE_FASTQ_EXTENSION1.out.reads
.groupTuple() .groupTuple()
)
ENSURE_FASTQ_EXTENSION2(ADAPTERREMOVAL_SINGLE.out.singles_truncated) CAT_FASTQ(ch_concat_fastq)
ch_adapterremoval_reads_prepped = CAT_FASTQ.out.reads ch_adapterremoval_reads_prepped = CAT_FASTQ.out.reads
.mix(ENSURE_FASTQ_EXTENSION2.out.reads) .mix(ADAPTERREMOVAL_SINGLE.out.singles_truncated)
} else { } else {
ENSURE_FASTQ_EXTENSION1( ch_adapterremoval_reads_prepped = ADAPTERREMOVAL_PAIRED.out.paired_truncated
ADAPTERREMOVAL_PAIRED.out.pair1_truncated .mix(ADAPTERREMOVAL_SINGLE.out.singles_truncated)
.map { meta, reads ->
meta.single_end = true
[meta, reads]
}
)
ENSURE_FASTQ_EXTENSION2(
ADAPTERREMOVAL_PAIRED.out.pair2_truncated
.map { meta, reads ->
meta.single_end = true
[meta, reads]
}
)
ENSURE_FASTQ_EXTENSION3(ADAPTERREMOVAL_SINGLE.out.singles_truncated)
ch_adapterremoval_reads_prepped = ENSURE_FASTQ_EXTENSION1.out.reads
.join(ENSURE_FASTQ_EXTENSION2.out.reads)
.groupTuple()
.map { meta, pair1, pair2 ->
meta.single_end = false
[ meta, [ pair1, pair2 ].flatten() ]
}
.mix(ENSURE_FASTQ_EXTENSION3.out.reads)
} }
ch_versions = ch_versions.mix( ADAPTERREMOVAL_SINGLE.out.versions.first() ) ch_versions = ch_versions.mix( ADAPTERREMOVAL_SINGLE.out.versions.first() )
ch_versions = ch_versions.mix( ADAPTERREMOVAL_PAIRED.out.versions.first() ) ch_versions = ch_versions.mix( ADAPTERREMOVAL_PAIRED.out.versions.first() )
ch_multiqc_files = ch_multiqc_files.mix( ch_multiqc_files = ch_multiqc_files.mix(
ADAPTERREMOVAL_PAIRED.out.log.collect{it[1]}, ADAPTERREMOVAL_PAIRED.out.settings.collect{it[1]},
ADAPTERREMOVAL_SINGLE.out.log.collect{it[1]} ADAPTERREMOVAL_SINGLE.out.settings.collect{it[1]}
) )
emit: emit: