1
0
Fork 0
mirror of https://github.com/MillironX/taxprofiler.git synced 2024-11-26 02:49:54 +00:00

Merge branch 'dev' into complexity-filter-bbduk

This commit is contained in:
James A. Fellows Yates 2022-04-05 13:11:15 +02:00 committed by GitHub
commit 23fb319779
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 57 additions and 76 deletions

View file

@ -4,7 +4,7 @@
"repos": { "repos": {
"nf-core/modules": { "nf-core/modules": {
"adapterremoval": { "adapterremoval": {
"git_sha": "f0800157544a82ae222931764483331a81812012" "git_sha": "879d42c5e28661fe0a5e744c9e2c515868f9e08a"
}, },
"bbmap/bbduk": { "bbmap/bbduk": {
"git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d"

View file

@ -12,14 +12,13 @@ process ADAPTERREMOVAL {
path(adapterlist) path(adapterlist)
output: output:
tuple val(meta), path("${prefix}.truncated.gz") , optional: true, emit: singles_truncated tuple val(meta), path("${prefix}.truncated.fastq.gz") , optional: true, emit: singles_truncated
tuple val(meta), path("${prefix}.discarded.gz") , optional: true, emit: discarded tuple val(meta), path("${prefix}.discarded.fastq.gz") , optional: true, emit: discarded
tuple val(meta), path("${prefix}.pair1.truncated.gz") , optional: true, emit: pair1_truncated tuple val(meta), path("${prefix}.pair{1,2}.truncated.fastq.gz") , optional: true, emit: paired_truncated
tuple val(meta), path("${prefix}.pair2.truncated.gz") , optional: true, emit: pair2_truncated tuple val(meta), path("${prefix}.collapsed.fastq.gz") , optional: true, emit: collapsed
tuple val(meta), path("${prefix}.collapsed.gz") , optional: true, emit: collapsed tuple val(meta), path("${prefix}.collapsed.truncated.fastq.gz") , optional: true, emit: collapsed_truncated
tuple val(meta), path("${prefix}.collapsed.truncated.gz") , optional: true, emit: collapsed_truncated tuple val(meta), path("${prefix}.paired.fastq.gz") , optional: true, emit: paired_interleaved
tuple val(meta), path("${prefix}.paired.gz") , optional: true, emit: paired_interleaved tuple val(meta), path('*.settings') , emit: settings
tuple val(meta), path('*.log') , emit: log
path "versions.yml" , emit: versions path "versions.yml" , emit: versions
when: when:
@ -38,10 +37,19 @@ process ADAPTERREMOVAL {
$adapterlist \\ $adapterlist \\
--basename ${prefix} \\ --basename ${prefix} \\
--threads ${task.cpus} \\ --threads ${task.cpus} \\
--settings ${prefix}.log \\
--seed 42 \\ --seed 42 \\
--gzip --gzip
ensure_fastq() {
if [ -f "\${1}" ]; then
mv "\${1}" "\${1::-3}.fastq.gz"
fi
}
ensure_fastq '${prefix}.truncated.gz'
ensure_fastq '${prefix}.discarded.gz'
cat <<-END_VERSIONS > versions.yml cat <<-END_VERSIONS > versions.yml
"${task.process}": "${task.process}":
adapterremoval: \$(AdapterRemoval --version 2>&1 | sed -e "s/AdapterRemoval ver. //g") adapterremoval: \$(AdapterRemoval --version 2>&1 | sed -e "s/AdapterRemoval ver. //g")
@ -56,10 +64,24 @@ process ADAPTERREMOVAL {
$adapterlist \\ $adapterlist \\
--basename ${prefix} \\ --basename ${prefix} \\
--threads $task.cpus \\ --threads $task.cpus \\
--settings ${prefix}.log \\
--seed 42 \\ --seed 42 \\
--gzip --gzip
ensure_fastq() {
if [ -f "\${1}" ]; then
mv "\${1}" "\${1::-3}.fastq.gz"
fi
}
ensure_fastq '${prefix}.truncated.gz'
ensure_fastq '${prefix}.discarded.gz'
ensure_fastq '${prefix}.pair1.truncated.gz'
ensure_fastq '${prefix}.pair2.truncated.gz'
ensure_fastq '${prefix}.collapsed.gz'
ensure_fastq '${prefix}.collapsed.truncated.gz'
ensure_fastq '${prefix}.paired.gz'
cat <<-END_VERSIONS > versions.yml cat <<-END_VERSIONS > versions.yml
"${task.process}": "${task.process}":
adapterremoval: \$(AdapterRemoval --version 2>&1 | sed -e "s/AdapterRemoval ver. //g") adapterremoval: \$(AdapterRemoval --version 2>&1 | sed -e "s/AdapterRemoval ver. //g")

View file

@ -43,43 +43,43 @@ output:
Adapter trimmed FastQ files of either single-end reads, or singleton Adapter trimmed FastQ files of either single-end reads, or singleton
'orphaned' reads from merging of paired-end data (i.e., one of the pair 'orphaned' reads from merging of paired-end data (i.e., one of the pair
was lost due to filtering thresholds). was lost due to filtering thresholds).
pattern: "*.truncated.gz" pattern: "*.truncated.fastq.gz"
- discarded: - discarded:
type: file type: file
description: | description: |
Adapter trimmed FastQ files of reads that did not pass filtering Adapter trimmed FastQ files of reads that did not pass filtering
thresholds. thresholds.
pattern: "*.discarded.gz" pattern: "*.discarded.fastq.gz"
- pair1_truncated: - pair1_truncated:
type: file type: file
description: | description: |
Adapter trimmed R1 FastQ files of paired-end reads that did not merge Adapter trimmed R1 FastQ files of paired-end reads that did not merge
with their respective R2 pair due to long templates. The respective pair with their respective R2 pair due to long templates. The respective pair
is stored in 'pair2_truncated'. is stored in 'pair2_truncated'.
pattern: "*.pair1.truncated.gz" pattern: "*.pair1.truncated.fastq.gz"
- pair2_truncated: - pair2_truncated:
type: file type: file
description: | description: |
Adapter trimmed R2 FastQ files of paired-end reads that did not merge Adapter trimmed R2 FastQ files of paired-end reads that did not merge
with their respective R1 pair due to long templates. The respective pair with their respective R1 pair due to long templates. The respective pair
is stored in 'pair1_truncated'. is stored in 'pair1_truncated'.
pattern: "*.pair2.truncated.gz" pattern: "*.pair2.truncated.fastq.gz"
- collapsed: - collapsed:
type: file type: file
description: | description: |
Collapsed FastQ of paired-end reads that successfully merged with their Collapsed FastQ of paired-end reads that successfully merged with their
respective R1 pair but were not trimmed. respective R1 pair but were not trimmed.
pattern: "*.collapsed.gz" pattern: "*.collapsed.fastq.gz"
- collapsed_truncated: - collapsed_truncated:
type: file type: file
description: | description: |
Collapsed FastQ of paired-end reads that successfully merged with their Collapsed FastQ of paired-end reads that successfully merged with their
respective R1 pair and were trimmed of adapter due to sufficient overlap. respective R1 pair and were trimmed of adapter due to sufficient overlap.
pattern: "*.collapsed.truncated.gz" pattern: "*.collapsed.truncated.fastq.gz"
- log: - log:
type: file type: file
description: AdapterRemoval log file description: AdapterRemoval log file
pattern: "*.log" pattern: "*.settings"
- versions: - versions:
type: file type: file
description: File containing software versions description: File containing software versions

View file

@ -5,11 +5,6 @@
include { ADAPTERREMOVAL as ADAPTERREMOVAL_SINGLE } from '../../modules/nf-core/modules/adapterremoval/main' include { ADAPTERREMOVAL as ADAPTERREMOVAL_SINGLE } from '../../modules/nf-core/modules/adapterremoval/main'
include { ADAPTERREMOVAL as ADAPTERREMOVAL_PAIRED } from '../../modules/nf-core/modules/adapterremoval/main' include { ADAPTERREMOVAL as ADAPTERREMOVAL_PAIRED } from '../../modules/nf-core/modules/adapterremoval/main'
include { CAT_FASTQ } from '../../modules/nf-core/modules/cat/fastq/main' include { CAT_FASTQ } from '../../modules/nf-core/modules/cat/fastq/main'
include {
ENSURE_FASTQ_EXTENSION as ENSURE_FASTQ_EXTENSION1;
ENSURE_FASTQ_EXTENSION as ENSURE_FASTQ_EXTENSION2;
ENSURE_FASTQ_EXTENSION as ENSURE_FASTQ_EXTENSION3;
} from '../../modules/local/ensure_fastq_extension'
workflow SHORTREAD_ADAPTERREMOVAL { workflow SHORTREAD_ADAPTERREMOVAL {
@ -36,34 +31,27 @@ workflow SHORTREAD_ADAPTERREMOVAL {
if ( params.shortread_clipmerge_mergepairs && !params.shortread_clipmerge_excludeunmerged ) { if ( params.shortread_clipmerge_mergepairs && !params.shortread_clipmerge_excludeunmerged ) {
ENSURE_FASTQ_EXTENSION1( ch_concat_fastq = Channel.empty()
Channel.empty().mix( .mix(
ADAPTERREMOVAL_PAIRED.out.collapsed, ADAPTERREMOVAL_PAIRED.out.collapsed,
ADAPTERREMOVAL_PAIRED.out.collapsed_truncated, ADAPTERREMOVAL_PAIRED.out.collapsed_truncated,
ADAPTERREMOVAL_PAIRED.out.singles_truncated, ADAPTERREMOVAL_PAIRED.out.singles_truncated,
ADAPTERREMOVAL_PAIRED.out.pair1_truncated, ADAPTERREMOVAL_PAIRED.out.paired_truncated
ADAPTERREMOVAL_PAIRED.out.pair2_truncated
) )
.map { meta, reads ->
meta.single_end = true
[meta, reads]
}
)
CAT_FASTQ(
ENSURE_FASTQ_EXTENSION1.out.reads
.groupTuple() .groupTuple()
) // Paired-end reads cause a nested tuple during grouping.
// We want to present a flat list of files to `CAT_FASTQ`.
.map { meta, fastq -> [meta, fastq.flatten()] }
ENSURE_FASTQ_EXTENSION2(ADAPTERREMOVAL_SINGLE.out.singles_truncated) CAT_FASTQ(ch_concat_fastq)
ch_adapterremoval_reads_prepped = CAT_FASTQ.out.reads ch_adapterremoval_reads_prepped = CAT_FASTQ.out.reads
.mix(ENSURE_FASTQ_EXTENSION2.out.reads) .mix(ADAPTERREMOVAL_SINGLE.out.singles_truncated)
} else if ( params.shortread_clipmerge_mergepairs && params.shortread_clipmerge_excludeunmerged ) { } else if ( params.shortread_clipmerge_mergepairs && params.shortread_clipmerge_excludeunmerged ) {
ENSURE_FASTQ_EXTENSION1( ch_concat_fastq = Channel.empty()
Channel.empty().mix( .mix(
ADAPTERREMOVAL_PAIRED.out.collapsed, ADAPTERREMOVAL_PAIRED.out.collapsed,
ADAPTERREMOVAL_PAIRED.out.collapsed_truncated ADAPTERREMOVAL_PAIRED.out.collapsed_truncated
) )
@ -71,46 +59,17 @@ workflow SHORTREAD_ADAPTERREMOVAL {
meta.single_end = true meta.single_end = true
[meta, reads] [meta, reads]
} }
)
CAT_FASTQ(
ENSURE_FASTQ_EXTENSION1.out.reads
.groupTuple() .groupTuple()
)
ENSURE_FASTQ_EXTENSION2(ADAPTERREMOVAL_SINGLE.out.singles_truncated) CAT_FASTQ(ch_concat_fastq)
ch_adapterremoval_reads_prepped = CAT_FASTQ.out.reads ch_adapterremoval_reads_prepped = CAT_FASTQ.out.reads
.mix(ENSURE_FASTQ_EXTENSION2.out.reads) .mix(ADAPTERREMOVAL_SINGLE.out.singles_truncated)
} else { } else {
ENSURE_FASTQ_EXTENSION1( ch_adapterremoval_reads_prepped = ADAPTERREMOVAL_PAIRED.out.paired_truncated
ADAPTERREMOVAL_PAIRED.out.pair1_truncated .mix(ADAPTERREMOVAL_SINGLE.out.singles_truncated)
.map { meta, reads ->
meta.single_end = true
[meta, reads]
}
)
ENSURE_FASTQ_EXTENSION2(
ADAPTERREMOVAL_PAIRED.out.pair2_truncated
.map { meta, reads ->
meta.single_end = true
[meta, reads]
}
)
ENSURE_FASTQ_EXTENSION3(ADAPTERREMOVAL_SINGLE.out.singles_truncated)
ch_adapterremoval_reads_prepped = ENSURE_FASTQ_EXTENSION1.out.reads
.join(ENSURE_FASTQ_EXTENSION2.out.reads)
.groupTuple()
.map { meta, pair1, pair2 ->
meta.single_end = false
[ meta, [ pair1, pair2 ].flatten() ]
}
.mix(ENSURE_FASTQ_EXTENSION3.out.reads)
} }