1
0
Fork 0
mirror of https://github.com/MillironX/taxprofiler.git synced 2024-11-10 23:23:09 +00:00

fix: insert .fastq file extension

The MetaPhlAn3 module computes the input type from the file extension
without the possibility to configure an override. Since AdapterRemoval2
creates files without `.fastq` extensions, MetaPhlAn3 input was determined
to be SAM.
This commit is contained in:
Moritz E. Beber 2022-04-02 21:50:13 +02:00
parent 4eab257d68
commit be93eae640
2 changed files with 118 additions and 39 deletions

View file

@ -0,0 +1,31 @@
process ENSURE_FASTQ_EXTENSION {
tag "$meta.id"
label 'process_low'
conda (params.enable_conda ? "conda-forge::bash=5.0" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://containers.biocontainers.pro/s3/SingImgsRepo/biocontainers/v1.2.0_cv2/biocontainers_v1.2.0_cv2.img' :
'biocontainers/biocontainers:v1.2.0_cv2' }"
input:
tuple val(meta), path(reads)
output:
tuple val(meta), path('*.fastq.gz'), emit: reads
script:
if (meta.single_end) {
fastq = "${reads.baseName}.fastq.gz"
"""
ln -s '${reads}' '${fastq}'
"""
} else {
first = "${reads[0].baseName}.fastq.gz"
second = "${reads[1].baseName}.fastq.gz"
"""
ln -s '${reads[0]}' '${first}'
ln -s '${reads[1]}' '${second}'
"""
}
}

View file

@ -5,6 +5,11 @@ Process short raw reads with AdapterRemoval
include { ADAPTERREMOVAL as ADAPTERREMOVAL_SINGLE } from '../../modules/nf-core/modules/adapterremoval/main' include { ADAPTERREMOVAL as ADAPTERREMOVAL_SINGLE } from '../../modules/nf-core/modules/adapterremoval/main'
include { ADAPTERREMOVAL as ADAPTERREMOVAL_PAIRED } from '../../modules/nf-core/modules/adapterremoval/main' include { ADAPTERREMOVAL as ADAPTERREMOVAL_PAIRED } from '../../modules/nf-core/modules/adapterremoval/main'
include { CAT_FASTQ } from '../../modules/nf-core/modules/cat/fastq/main' include { CAT_FASTQ } from '../../modules/nf-core/modules/cat/fastq/main'
include {
ENSURE_FASTQ_EXTENSION as ENSURE_FASTQ_EXTENSION1;
ENSURE_FASTQ_EXTENSION as ENSURE_FASTQ_EXTENSION2;
ENSURE_FASTQ_EXTENSION as ENSURE_FASTQ_EXTENSION3;
} from '../../modules/local/ensure_fastq_extension'
workflow SHORTREAD_ADAPTERREMOVAL { workflow SHORTREAD_ADAPTERREMOVAL {
@ -30,59 +35,102 @@ workflow SHORTREAD_ADAPTERREMOVAL {
*/ */
if ( params.shortread_clipmerge_mergepairs && !params.shortread_clipmerge_excludeunmerged ) { if ( params.shortread_clipmerge_mergepairs && !params.shortread_clipmerge_excludeunmerged ) {
ch_adapterremoval_for_cat = ADAPTERREMOVAL_PAIRED.out.collapsed
.mix( ENSURE_FASTQ_EXTENSION1(
Channel.empty().mix(
ADAPTERREMOVAL_PAIRED.out.collapsed,
ADAPTERREMOVAL_PAIRED.out.collapsed_truncated, ADAPTERREMOVAL_PAIRED.out.collapsed_truncated,
ADAPTERREMOVAL_PAIRED.out.singles_truncated, ADAPTERREMOVAL_PAIRED.out.singles_truncated,
ADAPTERREMOVAL_PAIRED.out.pair1_truncated, ADAPTERREMOVAL_PAIRED.out.pair1_truncated,
ADAPTERREMOVAL_PAIRED.out.pair2_truncated ADAPTERREMOVAL_PAIRED.out.pair2_truncated
) )
.map { .map { meta, reads ->
meta, reads -> meta.single_end = true
def meta_new = meta.clone() [meta, reads]
meta_new.single_end = true
[ meta_new, reads ]
} }
.groupTuple() )
ch_adapterremoval_reads_prepped = CAT_FASTQ ( ch_adapterremoval_for_cat ).reads CAT_FASTQ(
.mix( ADAPTERREMOVAL_SINGLE.out.singles_truncated ) ENSURE_FASTQ_EXTENSION1.out.reads
.groupTuple()
)
ENSURE_FASTQ_EXTENSION2(ADAPTERREMOVAL_SINGLE.out.singles_truncated)
ch_adapterremoval_reads_prepped = CAT_FASTQ.out.reads
.mix(ENSURE_FASTQ_EXTENSION2.out.reads)
} else if ( params.shortread_clipmerge_mergepairs && params.shortread_clipmerge_excludeunmerged ) { } else if ( params.shortread_clipmerge_mergepairs && params.shortread_clipmerge_excludeunmerged ) {
ch_adapterremoval_for_cat = ADAPTERREMOVAL_PAIRED.out.collapsed
.mix( ADAPTERREMOVAL_PAIRED.out.collapsed_truncated )
.map {
meta, reads ->
def meta_new = meta.clone()
meta_new['single_end'] = true
[ meta_new, reads ] ENSURE_FASTQ_EXTENSION1(
Channel.empty().mix(
ADAPTERREMOVAL_PAIRED.out.collapsed,
ADAPTERREMOVAL_PAIRED.out.collapsed_truncated
)
.map { meta, reads ->
meta.single_end = true
[meta, reads]
} }
.groupTuple(by: 0) )
ch_adapterremoval_reads_prepped = CAT_FASTQ ( ch_adapterremoval_for_cat ).reads CAT_FASTQ(
.mix( ADAPTERREMOVAL_SINGLE.out.singles_truncated ) ENSURE_FASTQ_EXTENSION1.out.reads
.groupTuple()
)
ENSURE_FASTQ_EXTENSION2(ADAPTERREMOVAL_SINGLE.out.singles_truncated)
ch_adapterremoval_reads_prepped = CAT_FASTQ.out.reads
.mix(ENSURE_FASTQ_EXTENSION2.out.reads)
} else { } else {
ch_adapterremoval_reads_prepped = ADAPTERREMOVAL_PAIRED.out.pair1_truncated ENSURE_FASTQ_EXTENSION1(
.join( ADAPTERREMOVAL_PAIRED.out.pair2_truncated ) ADAPTERREMOVAL_PAIRED.out.pair1_truncated
.map { meta, reads ->
meta.single_end = true
[meta, reads]
}
)
ENSURE_FASTQ_EXTENSION2(
ADAPTERREMOVAL_PAIRED.out.pair2_truncated
.map { meta, reads ->
meta.single_end = true
[meta, reads]
}
)
ENSURE_FASTQ_EXTENSION3(ADAPTERREMOVAL_SINGLE.out.singles_truncated)
ch_adapterremoval_reads_prepped = ENSURE_FASTQ_EXTENSION1.out.reads
.join(ENSURE_FASTQ_EXTENSION2.out.reads)
.groupTuple() .groupTuple()
.map { meta, pair1, pair2 -> .map { meta, pair1, pair2 ->
meta.single_end = false
[ meta, [ pair1, pair2 ].flatten() ] [ meta, [ pair1, pair2 ].flatten() ]
} }
.mix( ADAPTERREMOVAL_SINGLE.out.singles_truncated ) .mix(ENSURE_FASTQ_EXTENSION3.out.reads)
}
ch_processed_reads = ch_adapterremoval_reads_prepped }
ch_versions = ch_versions.mix( ADAPTERREMOVAL_SINGLE.out.versions.first() ) ch_versions = ch_versions.mix( ADAPTERREMOVAL_SINGLE.out.versions.first() )
ch_versions = ch_versions.mix( ADAPTERREMOVAL_PAIRED.out.versions.first() ) ch_versions = ch_versions.mix( ADAPTERREMOVAL_PAIRED.out.versions.first() )
ch_multiqc_files = ch_multiqc_files.mix( ADAPTERREMOVAL_PAIRED.out.log.collect{it[1]}, ADAPTERREMOVAL_SINGLE.out.log.collect{it[1]} ) ch_multiqc_files = ch_multiqc_files.mix(
ADAPTERREMOVAL_PAIRED.out.log.collect{it[1]},
ADAPTERREMOVAL_SINGLE.out.log.collect{it[1]}
)
emit: emit:
reads = ch_processed_reads // channel: [ val(meta), [ reads ] ] reads = ch_adapterremoval_reads_prepped // channel: [ val(meta), [ reads ] ]
versions = ch_versions // channel: [ versions.yml ] versions = ch_versions // channel: [ versions.yml ]
mqc = ch_multiqc_files mqc = ch_multiqc_files
} }
def ensureFastQExtension(row) {
def (meta, read) = row
def filename = file(read.parent).resolve("${read.baseName}.fastq.gz")
read.renameTo(filename.toString())
return [meta, read]
}