Untar unzip meta (#1408)

* fix: remove left-over unnecessary code

* Adds support for meta lists for unzip and untar

* Fix test inputs

* Update all modules to support extraction of decompressed file from untar/unzip new meta + file tuple

* Update all modules to support extraction of decompressed file from untar/unzip new meta + file tuple

* Fix MALTEXTRACT/AMPS

* Fix further modules

* Fix cellranger

* Apply suggestions from code review

Co-authored-by: Harshil Patel <drpatelh@users.noreply.github.com>
This commit is contained in:
James A. Fellows Yates 2022-03-21 14:49:28 +01:00 committed by GitHub
parent 8c00897853
commit e080f4c8ac
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
23 changed files with 121 additions and 92 deletions

View file

@ -8,19 +8,19 @@ process UNTAR {
'biocontainers/biocontainers:v1.2.0_cv1' }" 'biocontainers/biocontainers:v1.2.0_cv1' }"
input: input:
path archive tuple val(meta), path(archive)
output: output:
path "$untar" , emit: untar tuple val(meta), path("$untar"), emit: untar
path "versions.yml", emit: versions path "versions.yml" , emit: versions
when: when:
task.ext.when == null || task.ext.when task.ext.when == null || task.ext.when
script: script:
def args = task.ext.args ?: '' def args = task.ext.args ?: ''
def args2 = task.ext.args2 ?: '' def args2 = task.ext.args2 ?: ''
untar = archive.toString() - '.tar.gz' untar = archive.toString() - '.tar.gz'
""" """
tar \\ tar \\
-xzvf \\ -xzvf \\

View file

@ -10,11 +10,21 @@ tools:
documentation: https://www.gnu.org/software/tar/manual/ documentation: https://www.gnu.org/software/tar/manual/
licence: ["GPL-3.0-or-later"] licence: ["GPL-3.0-or-later"]
input: input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- archive: - archive:
type: file type: file
description: File to be untar description: File to be untar
pattern: "*.{tar}.{gz}" pattern: "*.{tar}.{gz}"
output: output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- untar: - untar:
type: file type: file
description: description:

View file

@ -8,11 +8,11 @@ process UNZIP {
'quay.io/biocontainers/p7zip:15.09--h2d50403_4' }" 'quay.io/biocontainers/p7zip:15.09--h2d50403_4' }"
input: input:
path archive tuple val(meta), path(archive)
output: output:
path "${archive.baseName}/", emit: unzipped_archive tuple val(meta), path("${archive.baseName}/"), emit: unzipped_archive
path "versions.yml" , emit: versions path "versions.yml" , emit: versions
when: when:
task.ext.when == null || task.ext.when task.ext.when == null || task.ext.when

View file

@ -12,12 +12,22 @@ tools:
licence: ["LGPL-2.1-or-later"] licence: ["LGPL-2.1-or-later"]
input: input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- archive: - archive:
type: file type: file
description: ZIP file description: ZIP file
pattern: "*.zip" pattern: "*.zip"
output: output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- unzipped_archive: - unzipped_archive:
type: directory type: directory
description: Directory contents of the unzipped archive description: Directory contents of the unzipped archive

View file

@ -15,18 +15,21 @@ workflow test_amps {
fastas = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) fastas = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
gff = [] gff = []
seq_type = "DNA" seq_type = "DNA"
map_db = file("https://software-ab.informatik.uni-tuebingen.de/download/megan6/megan-nucl-Jan2021.db.zip", checkIfExists: true) map_db = [ [], file("https://software-ab.informatik.uni-tuebingen.de/download/megan6/megan-nucl-Jan2021.db.zip", checkIfExists: true) ]
input = file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) input = [
[ id:'test', single_end:false ], // meta map
file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true)
]
mode = "BlastN" mode = "BlastN"
taxon_list = file(params.test_data['sarscov2']['genome']['taxon_list_txt'], checkIfExists: true) taxon_list = file(params.test_data['sarscov2']['genome']['taxon_list_txt'], checkIfExists: true)
ncbi_dir = file(params.test_data['sarscov2']['genome']['ncbi_taxmap_zip'], checkIfExists: true) ncbi_dir = [ [], file(params.test_data['sarscov2']['genome']['ncbi_taxmap_zip'], checkIfExists: true) ]
filter = "def_anc"
UNZIP_MALT ( map_db ) UNZIP_MALT ( map_db )
UNZIP_MALTEXTRACT ( ncbi_dir ) UNZIP_MALTEXTRACT ( ncbi_dir )
MALT_BUILD ( fastas, seq_type, gff, UNZIP_MALT.out.unzipped_archive ) MALT_BUILD ( fastas, seq_type, gff, UNZIP_MALT.out.unzipped_archive.map{ it[1] } )
MALT_RUN ( input, mode, MALT_BUILD.out.index ) MALT_RUN ( input, mode, MALT_BUILD.out.index )
MALTEXTRACT ( MALT_RUN.out.rma6, taxon_list, UNZIP_MALTEXTRACT.out.unzipped_archive) ch_input_to_maltextract = MALT_RUN.out.rma6.map{ it[1] }
MALTEXTRACT ( ch_input_to_maltextract, taxon_list, UNZIP_MALTEXTRACT.out.unzipped_archive.map{ it[1] })
AMPS ( MALTEXTRACT.out.results, taxon_list, filter ) AMPS ( MALTEXTRACT.out.results, taxon_list, filter )
} }

View file

@ -11,12 +11,12 @@ workflow test_artic_minion {
[ id:'test', single_end:false ], // meta map [ id:'test', single_end:false ], // meta map
file(params.test_data['sarscov2']['nanopore']['test_fastq_gz'], checkIfExists: true) file(params.test_data['sarscov2']['nanopore']['test_fastq_gz'], checkIfExists: true)
] ]
fast5_tar = file(params.test_data['sarscov2']['nanopore']['fast5_tar_gz'], checkIfExists: true) fast5_tar = [ [], file(params.test_data['sarscov2']['nanopore']['fast5_tar_gz'], checkIfExists: true) ]
sequencing_summary = file(params.test_data['sarscov2']['nanopore']['test_sequencing_summary'], checkIfExists: true) sequencing_summary = file(params.test_data['sarscov2']['nanopore']['test_sequencing_summary'], checkIfExists: true)
fasta = file('https://github.com/artic-network/primer-schemes/raw/master/nCoV-2019/V3/nCoV-2019.reference.fasta', checkIfExists: true) fasta = file('https://github.com/artic-network/primer-schemes/raw/master/nCoV-2019/V3/nCoV-2019.reference.fasta', checkIfExists: true)
bed = file('https://github.com/artic-network/primer-schemes/raw/master/nCoV-2019/V3/nCoV-2019.primer.bed', checkIfExists: true) bed = file('https://github.com/artic-network/primer-schemes/raw/master/nCoV-2019/V3/nCoV-2019.primer.bed', checkIfExists: true)
fast5_dir = UNTAR ( fast5_tar ).untar fast5_dir = UNTAR ( fast5_tar ).untar.map{ it[1] }
ARTIC_MINION ( input, fast5_dir, sequencing_summary, fasta, bed, [], '', 'nCoV-2019', '3') ARTIC_MINION ( input, fast5_dir, sequencing_summary, fasta, bed, [], '', 'nCoV-2019', '3')
} }

View file

@ -8,19 +8,19 @@ include { CELLRANGER_MKFASTQ } from '../../../../modules/cellranger/mkfastq/main
workflow test_cellranger_mkfastq_simple { workflow test_cellranger_mkfastq_simple {
simple_csv = file("https://cf.10xgenomics.com/supp/cell-exp/cellranger-tiny-bcl-simple-1.2.0.csv", checkIfExists: true) simple_csv = file("https://cf.10xgenomics.com/supp/cell-exp/cellranger-tiny-bcl-simple-1.2.0.csv", checkIfExists: true)
tiny_bcl = file("https://cf.10xgenomics.com/supp/cell-exp/cellranger-tiny-bcl-1.2.0.tar.gz", checkIfExists: true) tiny_bcl = [ [], file("https://cf.10xgenomics.com/supp/cell-exp/cellranger-tiny-bcl-1.2.0.tar.gz", checkIfExists: true) ]
UNTAR ( tiny_bcl ) UNTAR ( tiny_bcl )
CELLRANGER_MKFASTQ ( UNTAR.out.untar, simple_csv) CELLRANGER_MKFASTQ ( UNTAR.out.untar.map{ it[1] }, simple_csv)
} }
workflow test_cellranger_mkfastq_illumina { workflow test_cellranger_mkfastq_illumina {
samplesheet_csv = file("https://cf.10xgenomics.com/supp/cell-exp/cellranger-tiny-bcl-samplesheet-1.2.0.csv", checkIfExists: true) samplesheet_csv = file("https://cf.10xgenomics.com/supp/cell-exp/cellranger-tiny-bcl-samplesheet-1.2.0.csv", checkIfExists: true)
tiny_bcl = file("https://cf.10xgenomics.com/supp/cell-exp/cellranger-tiny-bcl-1.2.0.tar.gz", checkIfExists: true) tiny_bcl = [ [], file("https://cf.10xgenomics.com/supp/cell-exp/cellranger-tiny-bcl-1.2.0.tar.gz", checkIfExists: true) ]
UNTAR ( tiny_bcl ) UNTAR ( tiny_bcl )
CELLRANGER_MKFASTQ ( UNTAR.out.untar, samplesheet_csv) CELLRANGER_MKFASTQ ( UNTAR.out.untar.map{ it[1] }, samplesheet_csv)
} }

View file

@ -19,7 +19,7 @@ workflow test_controlfreec {
dbsnp = file(params.test_data['homo_sapiens']['genome']['dbsnp_138_hg38_21_vcf_gz'], checkIfExists: true) dbsnp = file(params.test_data['homo_sapiens']['genome']['dbsnp_138_hg38_21_vcf_gz'], checkIfExists: true)
dbsnp_tbi = file(params.test_data['homo_sapiens']['genome']['dbsnp_138_hg38_21_vcf_gz_tbi'], checkIfExists: true) dbsnp_tbi = file(params.test_data['homo_sapiens']['genome']['dbsnp_138_hg38_21_vcf_gz_tbi'], checkIfExists: true)
chrfiles = file(params.test_data['homo_sapiens']['genome']['genome_21_chromosomes_dir'], checkIfExists: true) chrfiles = [ [], file(params.test_data['homo_sapiens']['genome']['genome_21_chromosomes_dir'], checkIfExists: true) ]
target_bed = file(params.test_data['homo_sapiens']['genome']['genome_21_multi_interval_bed'], checkIfExists: true) target_bed = file(params.test_data['homo_sapiens']['genome']['genome_21_multi_interval_bed'], checkIfExists: true)
UNTAR(chrfiles) UNTAR(chrfiles)
@ -29,7 +29,7 @@ workflow test_controlfreec {
[], [],
dbsnp, dbsnp,
dbsnp_tbi, dbsnp_tbi,
UNTAR.out.untar, UNTAR.out.untar.map{ it[1] },
[], [],
target_bed, target_bed,
[] []

View file

@ -6,12 +6,12 @@ include { UNTAR } from '../../../../modules/untar/main.nf'
include { GATK4_CREATESOMATICPANELOFNORMALS } from '../../../../modules/gatk4/createsomaticpanelofnormals/main.nf' include { GATK4_CREATESOMATICPANELOFNORMALS } from '../../../../modules/gatk4/createsomaticpanelofnormals/main.nf'
workflow test_gatk4_createsomaticpanelofnormals { workflow test_gatk4_createsomaticpanelofnormals {
db = file(params.test_data['homo_sapiens']['illumina']['test_genomicsdb_tar_gz'], checkIfExists: true) db = [[], file(params.test_data['homo_sapiens']['illumina']['test_genomicsdb_tar_gz'], checkIfExists: true) ]
UNTAR ( db ) UNTAR ( db )
input = Channel.of([ id:'test']) input = Channel.of([ id:'test'])
.combine(UNTAR.out.untar) .combine(UNTAR.out.untar.map{ it[1] })
fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
fastaidx = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) fastaidx = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)

View file

@ -22,7 +22,7 @@ workflow test_gatk4_genomicsdbimport_create_genomicsdb {
} }
workflow test_gatk4_genomicsdbimport_get_intervalslist { workflow test_gatk4_genomicsdbimport_get_intervalslist {
db = file(params.test_data['homo_sapiens']['illumina']['test_genomicsdb_tar_gz'], checkIfExists: true) db = [ [], file(params.test_data['homo_sapiens']['illumina']['test_genomicsdb_tar_gz'], checkIfExists: true) ]
UNTAR ( db ) UNTAR ( db )
@ -31,7 +31,7 @@ workflow test_gatk4_genomicsdbimport_get_intervalslist {
[] , [] ,
[] , [] ,
[] ]) [] ])
.combine(UNTAR.out.untar) .combine(UNTAR.out.untar.map{ it[1] })
run_intlist = true run_intlist = true
run_updatewspace = false run_updatewspace = false
@ -41,7 +41,7 @@ workflow test_gatk4_genomicsdbimport_get_intervalslist {
} }
workflow test_gatk4_genomicsdbimport_update_genomicsdb { workflow test_gatk4_genomicsdbimport_update_genomicsdb {
db = file(params.test_data['homo_sapiens']['illumina']['test_genomicsdb_tar_gz'], checkIfExists: true) db = [ [], file(params.test_data['homo_sapiens']['illumina']['test_genomicsdb_tar_gz'], checkIfExists: true) ]
UNTAR ( db ) UNTAR ( db )
@ -50,7 +50,7 @@ workflow test_gatk4_genomicsdbimport_update_genomicsdb {
file( params.test_data['homo_sapiens']['illumina']['test2_genome_vcf_gz_tbi'] , checkIfExists: true) , file( params.test_data['homo_sapiens']['illumina']['test2_genome_vcf_gz_tbi'] , checkIfExists: true) ,
[] , [] ,
[] ]) [] ])
.combine(UNTAR.out.untar) .combine(UNTAR.out.untar.map{ it[1] })
run_intlist = false run_intlist = false
run_updatewspace = true run_updatewspace = true

View file

@ -97,10 +97,10 @@ workflow test_gatk4_genotypegvcfs_gendb_input {
fastaIndex = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) fastaIndex = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
fastaDict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true) fastaDict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true)
test_genomicsdb = file(params.test_data['homo_sapiens']['illumina']['test_genomicsdb_tar_gz'], checkIfExists: true) test_genomicsdb = [ [], file(params.test_data['homo_sapiens']['illumina']['test_genomicsdb_tar_gz'], checkIfExists: true) ]
UNTAR ( test_genomicsdb ) UNTAR ( test_genomicsdb )
gendb = UNTAR.out.untar.collect() gendb = UNTAR.out.untar.map{ it[1] }.collect()
gendb.add([]) gendb.add([])
gendb.add([]) gendb.add([])
@ -119,10 +119,10 @@ workflow test_gatk4_genotypegvcfs_gendb_input_dbsnp {
dbsnp = file(params.test_data['homo_sapiens']['genome']['dbsnp_146_hg38_vcf_gz'], checkIfExists: true) dbsnp = file(params.test_data['homo_sapiens']['genome']['dbsnp_146_hg38_vcf_gz'], checkIfExists: true)
dbsnpIndex = file(params.test_data['homo_sapiens']['genome']['dbsnp_146_hg38_vcf_gz_tbi'], checkIfExists: true) dbsnpIndex = file(params.test_data['homo_sapiens']['genome']['dbsnp_146_hg38_vcf_gz_tbi'], checkIfExists: true)
test_genomicsdb = file(params.test_data['homo_sapiens']['illumina']['test_genomicsdb_tar_gz'], checkIfExists: true) test_genomicsdb = [ [], file(params.test_data['homo_sapiens']['illumina']['test_genomicsdb_tar_gz'], checkIfExists: true) ]
UNTAR ( test_genomicsdb ) UNTAR ( test_genomicsdb )
gendb = UNTAR.out.untar.collect() gendb = UNTAR.out.untar.map{ it[1] }.collect()
gendb.add([]) gendb.add([])
gendb.add([]) gendb.add([])
input = Channel.of([ id:'test' ]).combine(gendb) input = Channel.of([ id:'test' ]).combine(gendb)
@ -137,10 +137,10 @@ workflow test_gatk4_genotypegvcfs_gendb_input_intervals {
fastaIndex = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) fastaIndex = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
fastaDict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true) fastaDict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true)
test_genomicsdb = file(params.test_data['homo_sapiens']['illumina']['test_genomicsdb_tar_gz'], checkIfExists: true) test_genomicsdb = [ [], file(params.test_data['homo_sapiens']['illumina']['test_genomicsdb_tar_gz'], checkIfExists: true) ]
UNTAR ( test_genomicsdb ) UNTAR ( test_genomicsdb )
gendb = UNTAR.out.untar.collect() gendb = UNTAR.out.untar.map{ it[1] }.collect()
gendb.add([]) gendb.add([])
gendb.add([file(params.test_data['homo_sapiens']['genome']['genome_bed'], checkIfExists: true)]) gendb.add([file(params.test_data['homo_sapiens']['genome']['genome_bed'], checkIfExists: true)])
input = Channel.of([ id:'test' ]).combine(gendb) input = Channel.of([ id:'test' ]).combine(gendb)
@ -158,10 +158,10 @@ workflow test_gatk4_genotypegvcfs_gendb_input_dbsnp_intervals {
dbsnp = file(params.test_data['homo_sapiens']['genome']['dbsnp_146_hg38_vcf_gz'], checkIfExists: true) dbsnp = file(params.test_data['homo_sapiens']['genome']['dbsnp_146_hg38_vcf_gz'], checkIfExists: true)
dbsnpIndex = file(params.test_data['homo_sapiens']['genome']['dbsnp_146_hg38_vcf_gz_tbi'], checkIfExists: true) dbsnpIndex = file(params.test_data['homo_sapiens']['genome']['dbsnp_146_hg38_vcf_gz_tbi'], checkIfExists: true)
test_genomicsdb = file(params.test_data['homo_sapiens']['illumina']['test_genomicsdb_tar_gz'], checkIfExists: true) test_genomicsdb = [ [], file(params.test_data['homo_sapiens']['illumina']['test_genomicsdb_tar_gz'], checkIfExists: true) ]
UNTAR ( test_genomicsdb ) UNTAR ( test_genomicsdb )
gendb = UNTAR.out.untar.collect() gendb = UNTAR.out.untar.map{ it[1] }.collect()
gendb.add([]) gendb.add([])
gendb.add([file(params.test_data['homo_sapiens']['genome']['genome_bed'], checkIfExists: true)]) gendb.add([file(params.test_data['homo_sapiens']['genome']['genome_bed'], checkIfExists: true)])
input = Channel.of([ id:'test' ]).combine(gendb) input = Channel.of([ id:'test' ]).combine(gendb)

View file

@ -9,10 +9,10 @@ workflow test_kraken2_kraken2_single_end {
input = [ [ id:'test', single_end:true ], // meta map input = [ [ id:'test', single_end:true ], // meta map
[ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) ] [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) ]
] ]
db = file(params.test_data['sarscov2']['genome']['kraken2_tar_gz'], checkIfExists: true) db = [ [], file(params.test_data['sarscov2']['genome']['kraken2_tar_gz'], checkIfExists: true) ]
UNTAR ( db ) UNTAR ( db )
KRAKEN2_KRAKEN2 ( input, UNTAR.out.untar ) KRAKEN2_KRAKEN2 ( input, UNTAR.out.untar.map{ it[1] } )
} }
workflow test_kraken2_kraken2_paired_end { workflow test_kraken2_kraken2_paired_end {
@ -20,8 +20,8 @@ workflow test_kraken2_kraken2_paired_end {
[ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ] file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ]
] ]
db = file(params.test_data['sarscov2']['genome']['kraken2_tar_gz'], checkIfExists: true) db = [ [], file(params.test_data['sarscov2']['genome']['kraken2_tar_gz'], checkIfExists: true) ]
UNTAR ( db ) UNTAR ( db )
KRAKEN2_KRAKEN2 ( input, UNTAR.out.untar ) KRAKEN2_KRAKEN2 ( input, UNTAR.out.untar.map{ it[1] } )
} }

View file

@ -10,10 +10,10 @@ workflow test_last_lastal_with_dummy_param_file {
input = [ [ id:'contigs', single_end:false ], // meta map input = [ [ id:'contigs', single_end:false ], // meta map
file(params.test_data['sarscov2']['illumina']['contigs_fasta'], checkIfExists: true), file(params.test_data['sarscov2']['illumina']['contigs_fasta'], checkIfExists: true),
[] ] [] ]
db = [ file(params.test_data['sarscov2']['genome']['lastdb_tar_gz'], checkIfExists: true) ] db = [ [], file(params.test_data['sarscov2']['genome']['lastdb_tar_gz'], checkIfExists: true) ]
UNTAR ( db ) UNTAR ( db )
LAST_LASTAL ( input, UNTAR.out.untar) LAST_LASTAL ( input, UNTAR.out.untar.map{ it[1] })
} }
workflow test_last_lastal_with_real_param_file { workflow test_last_lastal_with_real_param_file {
@ -21,8 +21,8 @@ workflow test_last_lastal_with_real_param_file {
input = [ [ id:'contigs', single_end:false ], // meta map input = [ [ id:'contigs', single_end:false ], // meta map
file(params.test_data['sarscov2']['illumina']['contigs_fasta'], checkIfExists: true), file(params.test_data['sarscov2']['illumina']['contigs_fasta'], checkIfExists: true),
file(params.test_data['sarscov2']['genome']['contigs_genome_par'], checkIfExists: true) ] file(params.test_data['sarscov2']['genome']['contigs_genome_par'], checkIfExists: true) ]
db = [ file(params.test_data['sarscov2']['genome']['lastdb_tar_gz'], checkIfExists: true) ] db = [ [], file(params.test_data['sarscov2']['genome']['lastdb_tar_gz'], checkIfExists: true) ]
UNTAR ( db ) UNTAR ( db )
LAST_LASTAL ( input, UNTAR.out.untar) LAST_LASTAL ( input, UNTAR.out.untar.map{ it[1] })
} }

View file

@ -7,9 +7,9 @@ include { LAST_TRAIN } from '../../../../modules/last/train/main.nf'
workflow test_last_train { workflow test_last_train {
db = [ file(params.test_data['sarscov2']['genome']['lastdb_tar_gz'], checkIfExists: true) ] db = [ [], file(params.test_data['sarscov2']['genome']['lastdb_tar_gz'], checkIfExists: true) ]
input = [ [ id:'contigs' ], // meta map input = [ [ id:'contigs' ], // meta map
file(params.test_data['sarscov2']['illumina']['contigs_fasta'], checkIfExists: true) ] file(params.test_data['sarscov2']['illumina']['contigs_fasta'], checkIfExists: true) ]
UNTAR ( db ) UNTAR ( db )
LAST_TRAIN ( input, UNTAR.out.untar ) LAST_TRAIN ( input, UNTAR.out.untar.map{ it[1] } )
} }

View file

@ -9,18 +9,18 @@ workflow test_malt_build {
fastas = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) fastas = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
seq_type = "DNA" seq_type = "DNA"
gff = [] gff = []
map_db = file("https://software-ab.informatik.uni-tuebingen.de/download/megan6/megan-nucl-Jan2021.db.zip", checkIfExists: true) map_db = [ [], file("https://software-ab.informatik.uni-tuebingen.de/download/megan6/megan-nucl-Jan2021.db.zip", checkIfExists: true) ]
UNZIP ( map_db ) UNZIP ( map_db )
MALT_BUILD ( fastas, seq_type, gff, UNZIP.out.unzipped_archive ) MALT_BUILD ( fastas, seq_type, gff, UNZIP.out.unzipped_archive.map{ it[1] } )
} }
workflow test_malt_build_gff { workflow test_malt_build_gff {
fastas = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) fastas = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
seq_type = "DNA" seq_type = "DNA"
gff = file(params.test_data['sarscov2']['genome']['genome_gff3'], checkIfExists: true) gff = file(params.test_data['sarscov2']['genome']['genome_gff3'], checkIfExists: true)
map_db = file("https://software-ab.informatik.uni-tuebingen.de/download/megan6/megan-nucl-Jan2021.db.zip", checkIfExists: true) map_db = [ [], file("https://software-ab.informatik.uni-tuebingen.de/download/megan6/megan-nucl-Jan2021.db.zip", checkIfExists: true) ]
UNZIP ( map_db ) UNZIP ( map_db )
MALT_BUILD ( fastas, seq_type, gff, UNZIP.out.unzipped_archive ) MALT_BUILD ( fastas, seq_type, gff, UNZIP.out.unzipped_archive.map{ it[1] } )
} }

View file

@ -2,16 +2,16 @@
nextflow.enable.dsl = 2 nextflow.enable.dsl = 2
include { UNZIP } from '../../../../modules/unzip/main.nf' include { UNZIP } from '../../../../modules/unzip/main.nf'
include { MALT_BUILD } from '../../../../modules/malt/build/main.nf' include { MALT_BUILD } from '../../../../modules/malt/build/main.nf'
include { MALT_RUN } from '../../../../modules/malt/run/main.nf' include { MALT_RUN } from '../../../../modules/malt/run/main.nf'
workflow test_malt_run { workflow test_malt_run {
fastas = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) fastas = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
gff = file(params.test_data['sarscov2']['genome']['genome_gff3'], checkIfExists: true) gff = file(params.test_data['sarscov2']['genome']['genome_gff3'], checkIfExists: true)
seq_type = "DNA" seq_type = "DNA"
map_db = file("https://software-ab.informatik.uni-tuebingen.de/download/megan6/megan-nucl-Jan2021.db.zip", checkIfExists: true) map_db = [ [], file("https://software-ab.informatik.uni-tuebingen.de/download/megan6/megan-nucl-Jan2021.db.zip", checkIfExists: true) ]
input = [ input = [
[ id:'test', single_end:false ], // meta map [ id:'test', single_end:false ], // meta map
file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true)
@ -19,7 +19,7 @@ workflow test_malt_run {
mode = "BlastN" mode = "BlastN"
UNZIP ( map_db ) UNZIP ( map_db )
MALT_BUILD ( fastas, seq_type, gff, UNZIP.out.unzipped_archive ) MALT_BUILD ( fastas, seq_type, gff, UNZIP.out.unzipped_archive.map { it[1] } )
MALT_RUN ( input, mode, MALT_BUILD.out.index ) MALT_RUN ( input, mode, MALT_BUILD.out.index )
} }

View file

@ -4,24 +4,28 @@ nextflow.enable.dsl = 2
include { UNZIP as UNZIP_MALT } from '../../../modules/unzip/main.nf' include { UNZIP as UNZIP_MALT } from '../../../modules/unzip/main.nf'
include { UNZIP as UNZIP_MALTEXTRACT } from '../../../modules/unzip/main.nf' include { UNZIP as UNZIP_MALTEXTRACT } from '../../../modules/unzip/main.nf'
include { MALT_BUILD } from '../../../modules/malt/build/main.nf' include { MALT_BUILD } from '../../../modules/malt/build/main.nf'
include { MALT_RUN } from '../../../modules/malt/run/main.nf' include { MALT_RUN } from '../../../modules/malt/run/main.nf'
include { MALTEXTRACT } from '../../../modules/maltextract/main.nf' include { MALTEXTRACT } from '../../../modules/maltextract/main.nf'
workflow test_maltextract { workflow test_maltextract {
fastas = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) fastas = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
gff = [] gff = []
seq_type = "DNA" seq_type = "DNA"
map_db = file("https://software-ab.informatik.uni-tuebingen.de/download/megan6/megan-nucl-Jan2021.db.zip", checkIfExists: true) map_db = [ [], file("https://software-ab.informatik.uni-tuebingen.de/download/megan6/megan-nucl-Jan2021.db.zip", checkIfExists: true) ]
input = file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) input = [
[ id:'test', single_end:false ], // meta map
file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true)
]
mode = "BlastN" mode = "BlastN"
taxon_list = file(params.test_data['sarscov2']['genome']['taxon_list_txt'], checkIfExists: true) taxon_list = file(params.test_data['sarscov2']['genome']['taxon_list_txt'], checkIfExists: true)
ncbi_dir = file(params.test_data['sarscov2']['genome']['ncbi_taxmap_zip'], checkIfExists: true) ncbi_dir = [ [], file(params.test_data['sarscov2']['genome']['ncbi_taxmap_zip'], checkIfExists: true) ]
UNZIP_MALT ( map_db ) UNZIP_MALT ( map_db )
UNZIP_MALTEXTRACT ( ncbi_dir ) UNZIP_MALTEXTRACT ( ncbi_dir )
MALT_BUILD ( fastas, seq_type, gff, UNZIP_MALT.out.unzipped_archive ) MALT_BUILD ( fastas, seq_type, gff, UNZIP_MALT.out.unzipped_archive.map{ it[1] } )
MALT_RUN ( input, mode, MALT_BUILD.out.index ) MALT_RUN ( input, mode, MALT_BUILD.out.index )
MALTEXTRACT ( MALT_RUN.out.rma6, taxon_list, UNZIP_MALTEXTRACT.out.unzipped_archive) ch_input_to_maltextract = MALT_RUN.out.rma6.map{ it[1] }
MALTEXTRACT ( ch_input_to_maltextract, taxon_list, UNZIP_MALTEXTRACT.out.unzipped_archive.map{ it[1] })
} }

View file

@ -3,8 +3,6 @@
tags: tags:
- maltextract - maltextract
files: files:
- path: output/maltextract/results/error.txt
md5sum: d41d8cd98f00b204e9800998ecf8427e
- path: output/maltextract/results/error.txt - path: output/maltextract/results/error.txt
- path: output/maltextract/results/log.txt - path: output/maltextract/results/log.txt
contains: contains:

View file

@ -12,10 +12,10 @@ workflow test_metaphlan3_single_end {
[ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) ] [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) ]
] ]
db = channel.fromPath('https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/delete_me/metaphlan_database.tar.gz', type: 'dir', checkIfExists: true) db = [ [], file('https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/delete_me/metaphlan_database.tar.gz', checkIfExists: true) ]
UNTAR ( db ) UNTAR ( db )
METAPHLAN3 ( input, UNTAR.out.untar ) METAPHLAN3 ( input, UNTAR.out.untar.map{ it[1] } )
} }
workflow test_metaphlan3_paired_end { workflow test_metaphlan3_paired_end {
@ -25,11 +25,10 @@ workflow test_metaphlan3_paired_end {
file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ] file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ]
] ]
db = channel.fromPath('https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/delete_me/metaphlan_database.tar.gz', type: 'dir', checkIfExists: true) db = [ [], file('https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/delete_me/metaphlan_database.tar.gz', checkIfExists: true) ]
UNTAR ( db ) UNTAR ( db )
METAPHLAN3 ( input, UNTAR.out.untar ) METAPHLAN3 ( input, UNTAR.out.untar.map{ it[1] } )
} }
workflow test_metaphlan3_sam { workflow test_metaphlan3_sam {
@ -38,12 +37,11 @@ workflow test_metaphlan3_sam {
[ file(params.test_data['sarscov2']['illumina']['test_single_end_bam'], checkIfExists: true) ] [ file(params.test_data['sarscov2']['illumina']['test_single_end_bam'], checkIfExists: true) ]
] ]
db = channel.fromPath('https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/delete_me/metaphlan_database.tar.gz', type: 'dir', checkIfExists: true) db = [ [], file('https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/delete_me/metaphlan_database.tar.gz', checkIfExists: true) ]
UNTAR ( db ) UNTAR ( db )
SAMTOOLS_VIEW ( input, [] ) SAMTOOLS_VIEW ( input, [] )
METAPHLAN3 ( SAMTOOLS_VIEW.out.bam, UNTAR.out.untar ) METAPHLAN3 ( SAMTOOLS_VIEW.out.bam, UNTAR.out.untar.map{ it[1] } )
} }
workflow test_metaphlan3_fasta { workflow test_metaphlan3_fasta {
@ -52,8 +50,8 @@ workflow test_metaphlan3_fasta {
[ file(params.test_data['sarscov2']['genome']['transcriptome_fasta'], checkIfExists: true) ] [ file(params.test_data['sarscov2']['genome']['transcriptome_fasta'], checkIfExists: true) ]
] ]
db = channel.fromPath('https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/delete_me/metaphlan_database.tar.gz', type: 'dir', checkIfExists: true) db = [ [], file('https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/delete_me/metaphlan_database.tar.gz', checkIfExists: true) ]
UNTAR ( db ) UNTAR ( db )
METAPHLAN3 ( input, UNTAR.out.untar ) METAPHLAN3 ( input, UNTAR.out.untar.map{ it[1] } )
} }

View file

@ -7,22 +7,22 @@ include { SRATOOLS_FASTERQDUMP } from '../../../../modules/sratools/fasterqdump/
workflow test_sratools_fasterqdump_single_end { workflow test_sratools_fasterqdump_single_end {
archive = file(params.test_data['sarscov2']['illumina']['SRR13255544_tar_gz'], checkIfExists: true) archive = [ [], file(params.test_data['sarscov2']['illumina']['SRR13255544_tar_gz'], checkIfExists: true) ]
UNTAR ( archive ) UNTAR ( archive )
def input = Channel.of([ id:'test_single_end', single_end:true ]) def input = Channel.of([ id:'test_single_end', single_end:true ])
.combine(UNTAR.out.untar) .combine(UNTAR.out.untar.map{ it[1] })
SRATOOLS_FASTERQDUMP ( input ) SRATOOLS_FASTERQDUMP ( input )
} }
workflow test_sratools_fasterqdump_paired_end { workflow test_sratools_fasterqdump_paired_end {
archive = file(params.test_data['sarscov2']['illumina']['SRR11140744_tar_gz'], checkIfExists: true) archive = [ [], file(params.test_data['sarscov2']['illumina']['SRR11140744_tar_gz'], checkIfExists: true) ]
UNTAR ( archive ) UNTAR ( archive )
def input = Channel.of([ id:'test_paired_end', single_end:false ]) def input = Channel.of([ id:'test_paired_end', single_end:false ])
.combine(UNTAR.out.untar) .combine(UNTAR.out.untar.map{ it[1] })
SRATOOLS_FASTERQDUMP ( input ) SRATOOLS_FASTERQDUMP ( input )
} }

View file

@ -5,7 +5,10 @@ nextflow.enable.dsl = 2
include { UNTAR } from '../../../modules/untar/main.nf' include { UNTAR } from '../../../modules/untar/main.nf'
workflow test_untar { workflow test_untar {
input = file(params.test_data['sarscov2']['genome']['kraken2_tar_gz'], checkIfExists: true) input = [
[],
file(params.test_data['sarscov2']['genome']['kraken2_tar_gz'], checkIfExists: true)
]
UNTAR ( input ) UNTAR ( input )
} }

View file

@ -6,7 +6,10 @@ include { UNZIP } from '../../../modules/unzip/main.nf'
workflow test_unzip { workflow test_unzip {
archive = file(params.test_data['sarscov2']['genome']['ncbi_taxmap_zip'], checkIfExists: true) archive = [
[],
file(params.test_data['sarscov2']['genome']['ncbi_taxmap_zip'], checkIfExists: true)
]
UNZIP ( archive ) UNZIP ( archive )
} }

View file

@ -6,36 +6,36 @@ include { UNTAR } from '../../../modules/untar/main.nf'
include { VCFANNO } from '../../../modules/vcfanno/main.nf' include { VCFANNO } from '../../../modules/vcfanno/main.nf'
workflow test_vcfanno { workflow test_vcfanno {
input = [ input = [
[ id:'test_compressed', single_end:false ], // meta map [ id:'test_compressed', single_end:false ], // meta map
file(params.test_data['sarscov2']['illumina']['test_vcf_gz'], checkIfExists: true), file(params.test_data['sarscov2']['illumina']['test_vcf_gz'], checkIfExists: true),
file(params.test_data['sarscov2']['illumina']['test_vcf_gz_tbi'], checkIfExists: true) file(params.test_data['sarscov2']['illumina']['test_vcf_gz_tbi'], checkIfExists: true)
] ]
input_2 = [ [ id:'test_compressed', single_end:false ], // meta map input_2 = [ [ id:'test_compressed', single_end:false ], // meta map
[] ] [] ]
toml = file(params.test_data['homo_sapiens']['genome']['vcfanno_toml'], checkIfExists: true) toml = file(params.test_data['homo_sapiens']['genome']['vcfanno_toml'], checkIfExists: true)
resource_dir = file(params.test_data['homo_sapiens']['genome']['vcfanno_tar_gz'], checkIfExists: true) resource_dir = [[], file(params.test_data['homo_sapiens']['genome']['vcfanno_tar_gz'], checkIfExists: true) ]
UNTAR ( resource_dir ) UNTAR ( resource_dir )
VCFANNO ( input, input_2, toml, UNTAR.out.untar ) VCFANNO ( input, input_2, toml, UNTAR.out.untar.map{ it[1] } )
} }
workflow test_vcfanno_uncompressed { workflow test_vcfanno_uncompressed {
input = [ [ id:'test_uncompressed', single_end:false ], // meta map input = [ [ id:'test_uncompressed', single_end:false ], // meta map
[] ,[] ] [] ,[] ]
input_2 = [ input_2 = [
[ id:'test_uncompressed', single_end:false ], // meta map [ id:'test_uncompressed', single_end:false ], // meta map
file(params.test_data['sarscov2']['illumina']['test_vcf'], checkIfExists: true) file(params.test_data['sarscov2']['illumina']['test_vcf'], checkIfExists: true)
] ]
toml = file(params.test_data['homo_sapiens']['genome']['vcfanno_toml'], checkIfExists: true) toml = file(params.test_data['homo_sapiens']['genome']['vcfanno_toml'], checkIfExists: true)
resource_dir = file(params.test_data['homo_sapiens']['genome']['vcfanno_tar_gz'], checkIfExists: true) resource_dir = [[], file(params.test_data['homo_sapiens']['genome']['vcfanno_tar_gz'], checkIfExists: true) ]
UNTAR ( resource_dir ) UNTAR ( resource_dir )
VCFANNO ( input, input_2, toml, UNTAR.out.untar ) VCFANNO ( input, input_2, toml, UNTAR.out.untar.map{ it[1] } )
} }