Merge branch 'master' into tool/elprep-merge

This commit is contained in:
Matthias De Smet 2022-04-19 11:28:04 +02:00 committed by GitHub
commit 553c1bcf9d
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
19 changed files with 206 additions and 73 deletions

View file

@ -32,8 +32,8 @@ input:
description: loci file <CHR><tab><POS1>
pattern: "*.{tsv}"
- fasta:
type: file
description: Input genome fasta file. Required when passing CRAM files.
type: file
description: Input genome fasta file. Required when passing CRAM files.
output:
- meta:

View file

@ -23,14 +23,12 @@ process BWA_MEM {
def args = task.ext.args ?: ''
def args2 = task.ext.args2 ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def read_group = meta.read_group ? "-R ${meta.read_group}" : ""
def samtools_command = sort_bam ? 'sort' : 'view'
"""
INDEX=`find -L ./ -name "*.amb" | sed 's/.amb//'`
bwa mem \\
$args \\
$read_group \\
-t $task.cpus \\
\$INDEX \\
$reads \\

View file

@ -23,7 +23,6 @@ process BWAMEM2_MEM {
def args = task.ext.args ?: ''
def args2 = task.ext.args2 ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def read_group = meta.read_group ? "-R ${meta.read_group}" : ""
def samtools_command = sort_bam ? 'sort' : 'view'
"""
INDEX=`find -L ./ -name "*.amb" | sed 's/.amb//'`
@ -31,7 +30,6 @@ process BWAMEM2_MEM {
bwa-mem2 \\
mem \\
$args \\
$read_group \\
-t $task.cpus \\
\$INDEX \\
$reads \\

View file

@ -0,0 +1,33 @@
process CENTRIFUGE_KREPORT {
tag "$meta.id"
label 'process_low'
conda (params.enable_conda ? "bioconda::centrifuge=1.0.4_beta" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/centrifuge:1.0.4_beta--h9a82719_6':
'quay.io/biocontainers/centrifuge:1.0.4_beta--h9a82719_6' }"
input:
tuple val(meta), path(results)
path db
output:
tuple val(meta), path('*.txt') , emit: kreport
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
"""
db_name=`find -L ${db} -name "*.1.cf" -not -name "._*" | sed 's/.1.cf//'`
centrifuge-kreport -x \$db_name ${results} > ${prefix}.txt
cat <<-END_VERSIONS > versions.yml
"${task.process}":
centrifuge: \$( centrifuge --version | sed -n 1p | sed 's/^.*centrifuge-class version //')
END_VERSIONS
"""
}

View file

@ -0,0 +1,41 @@
name: "centrifuge_kreport"
description: Creates Kraken-style reports from centrifuge out files
keywords:
- metagenomics
tools:
- centrifuge:
description: Centrifuge is a classifier for metagenomic sequences.
homepage: https://ccb.jhu.edu/software/centrifuge/
documentation: https://ccb.jhu.edu/software/centrifuge/manual.shtml
doi: 10.1101/gr.210641.116
licence: ["GPL v3"]
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- results:
type: file
description: File containing the centrifuge classification results
pattern: "*.{txt}"
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- kreport:
type: file
description: |
File containing kraken-style report from centrifuge
out files.
pattern: "*.{txt}"
authors:
- "@sofstam"
- "@jfy133"

View file

@ -24,44 +24,23 @@ process DRAGMAP_ALIGN {
def args = task.ext.args ?: ''
def args2 = task.ext.args2 ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def read_group = meta.read_group ? "--RGSM ${meta.read_group}" : ""
def reads_command = meta.single_end ? "-1 $reads" : "-1 ${reads[0]} -2 ${reads[1]}"
def samtools_command = sort_bam ? 'sort' : 'view'
if (meta.single_end) {
"""
dragen-os \\
-r $hashmap \\
$args \\
$read_group \\
--num-threads $task.cpus \\
-1 $reads \\
2> ${prefix}.dragmap.log \\
| samtools $samtools_command $args2 --threads $task.cpus -o ${prefix}.bam -
cat <<-END_VERSIONS > versions.yml
"${task.process}":
dragmap: \$(echo \$(dragen-os --version 2>&1))
samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' )
END_VERSIONS
"""
} else {
"""
dragen-os \\
-r $hashmap \\
$args \\
$read_group \\
--num-threads $task.cpus \\
-1 ${reads[0]} \\
-2 ${reads[1]} \\
2> ${prefix}.dragmap.log \\
| samtools $samtools_command $args2 --threads $task.cpus -o ${prefix}.bam -
"""
dragen-os \\
-r $hashmap \\
$args \\
--num-threads $task.cpus \\
$reads_command \\
2> ${prefix}.dragmap.log \\
| samtools $samtools_command $args2 --threads $task.cpus -o ${prefix}.bam -
cat <<-END_VERSIONS > versions.yml
"${task.process}":
dragmap: \$(echo \$(dragen-os --version 2>&1))
samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' )
END_VERSIONS
"""
}
cat <<-END_VERSIONS > versions.yml
"${task.process}":
dragmap: \$(echo \$(dragen-os --version 2>&1))
samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' )
END_VERSIONS
"""
}

View file

@ -23,7 +23,7 @@ process GATK4_MARKDUPLICATES_SPARK {
script:
def args = task.ext.args ?: ''
prefix = task.ext.prefix ?: "${meta.id}"
def input_list = bam.collect{"--INPUT $it"}.join(' ')
def input_list = bam.collect{"--input $it"}.join(' ')
def avail_mem = 3
if (!task.memory) {

View file

@ -9,11 +9,11 @@ process KAIJU_KAIJU {
input:
tuple val(meta), path(reads)
tuple path(db), path(dbnodes)
path(db)
output:
tuple val(meta), path('*.tsv'), emit: results
path "versions.yml" , emit: versions
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
@ -23,11 +23,13 @@ process KAIJU_KAIJU {
def prefix = task.ext.prefix ?: "${meta.id}"
def input = meta.single_end ? "-i ${reads}" : "-i ${reads[0]} -j ${reads[1]}"
"""
dbnodes=`find -L ${db} -name "*nodes.dmp"`
dbname=`find -L ${db} -name "*.fmi" -not -name "._*"`
kaiju \\
$args \\
-z $task.cpus \\
-t ${dbnodes} \\
-f ${db} \\
-t \$dbnodes \\
-f \$dbname \\
-o ${prefix}.tsv \\
$input

View file

@ -50,3 +50,4 @@ output:
authors:
- "@talnor"
- "@sofstam"
- "@jfy133"

View file

@ -9,6 +9,7 @@ process STRANGER {
input:
tuple val(meta), path(vcf)
path variant_catalog
output:
tuple val(meta), path("*.gz"), emit: vcf
@ -20,10 +21,12 @@ process STRANGER {
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def options_variant_catalog = variant_catalog ? "--repeats-file $variant_catalog" : ""
"""
stranger \\
$args \\
$vcf | gzip --no-name > ${prefix}.vcf.gz
$vcf \\
$options_variant_catalog | gzip --no-name > ${prefix}.vcf.gz
cat <<-END_VERSIONS > versions.yml
"${task.process}":

View file

@ -24,6 +24,10 @@ input:
type: file
description: VCF with repeat expansions
pattern: "*.{vcf.gz,vcf}"
- variant_catalog:
type: file
description: json file with repeat expansion sites to genotype
pattern: "*.{json}"
output:
- meta:

View file

@ -28,9 +28,8 @@ params {
kraken2_bracken = "${test_data_dir}/genomics/sarscov2/genome/db/kraken2_bracken"
kraken2_bracken_tar_gz = "${test_data_dir}/genomics/sarscov2/genome/db/kraken2_bracken.tar.gz"
kaiju_fmi = "${test_data_dir}/genomics/sarscov2/genome/db/kaiju/proteins.fmi"
kaiju_nodes = "${test_data_dir}/genomics/sarscov2/genome/db/kaiju/nodes.dmp"
kaiju_names = "${test_data_dir}/genomics/sarscov2/genome/db/kaiju/names.dmp"
kaiju = "${test_data_dir}/genomics/sarscov2/genome/db/kaiju"
kaiju_tar_gz = "${test_data_dir}/genomics/sarscov2/genome/db/kaiju.tar.gz"
ncbi_taxmap_zip = "${test_data_dir}/genomics/sarscov2/genome/db/maltextract/ncbi_taxmap.zip"
taxon_list_txt = "${test_data_dir}/genomics/sarscov2/genome/db/maltextract/taxon_list.txt"

View file

@ -25,7 +25,6 @@ workflow test_centrifuge_centrifuge_paired_end {
file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ]
]
db = [ [], file('https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/delete_me/minigut_cf.tar.gz', checkIfExists: true) ]
//db_name = "minigut_cf"
save_unaligned = true
save_aligned = false
sam_format = false

View file

@ -0,0 +1,32 @@
#!/usr/bin/env nextflow
nextflow.enable.dsl = 2
include { UNTAR } from '../../../../modules/untar/main.nf'
include { CENTRIFUGE_CENTRIFUGE } from '../../../../modules/centrifuge/centrifuge/main.nf'
include { CENTRIFUGE_KREPORT } from '../../../../modules/centrifuge/kreport/main.nf'
workflow test_centrifuge_kreport_single_end {
input = [ [ id:'test', single_end:true ], // meta map
[ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) ]
]
db = [ [], file('https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/delete_me/minigut_cf.tar.gz', checkIfExists: true) ]
ch_db = UNTAR ( db )
CENTRIFUGE_CENTRIFUGE ( input, ch_db.untar.map{ it[1] }, false, false, false )
CENTRIFUGE_KREPORT ( CENTRIFUGE_CENTRIFUGE.out.results, ch_db.untar.map{ it[1] } )
}
workflow test_centrifuge_kreport_paired_end {
input = [ [ id:'test', single_end:false ], // meta map
[ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ]
]
db = [ [], file('https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/delete_me/minigut_cf.tar.gz', checkIfExists: true) ]
ch_db = UNTAR ( db )
CENTRIFUGE_CENTRIFUGE ( input, ch_db.untar.map{ it[1] }, false, false, false )
CENTRIFUGE_KREPORT ( CENTRIFUGE_CENTRIFUGE.out.results, ch_db.untar.map{ it[1] } )
}

View file

@ -0,0 +1,5 @@
process {
publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
}

View file

@ -0,0 +1,21 @@
- name: centrifuge kreport test_centrifuge_kreport_single_end
command: nextflow run tests/modules/centrifuge/kreport -entry test_centrifuge_kreport_single_end -c tests/config/nextflow.config
tags:
- centrifuge
- centrifuge/kreport
files:
- path: output/centrifuge/test.txt
md5sum: af1a51fe57eb6d428350ff4a4bf759d4
contains: ["unclassified"]
- path: output/centrifuge/versions.yml
- name: centrifuge kreport test_centrifuge_kreport_paired_end
command: nextflow run tests/modules/centrifuge/kreport -entry test_centrifuge_kreport_paired_end -c tests/config/nextflow.config
tags:
- centrifuge
- centrifuge/kreport
files:
- path: output/centrifuge/test.txt
md5sum: af1a51fe57eb6d428350ff4a4bf759d4
contains: ["unclassified"]
- path: output/centrifuge/versions.yml

View file

@ -2,6 +2,7 @@
nextflow.enable.dsl = 2
include { UNTAR } from '../../../../modules/untar/main.nf'
include { KAIJU_KAIJU } from '../../../../modules/kaiju/kaiju/main.nf'
workflow test_kaiju_kaiju_single_end {
@ -10,12 +11,10 @@ workflow test_kaiju_kaiju_single_end {
[ id:'test', single_end:true ], // meta map
file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true)
]
db = [
file(params.test_data['sarscov2']['genome']['kaiju_fmi'], checkIfExists: true), // database
file(params.test_data['sarscov2']['genome']['kaiju_nodes'], checkIfExists: true) // taxon nodes
]
db = [ [], file(params.test_data['sarscov2']['genome']['kaiju_tar_gz'], checkIfExists: true) ]
KAIJU_KAIJU ( input, db )
UNTAR ( db )
KAIJU_KAIJU ( input, UNTAR.out.untar.map{ it[1] } )
}
workflow test_kaiju_kaiju_paired_end {
@ -25,10 +24,9 @@ workflow test_kaiju_kaiju_paired_end {
[ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ]
]
db = [
file(params.test_data['sarscov2']['genome']['kaiju_fmi'], checkIfExists: true), // database
file(params.test_data['sarscov2']['genome']['kaiju_nodes'], checkIfExists: true) // taxon nodes
]
db = [ [], file(params.test_data['sarscov2']['genome']['kaiju_tar_gz'], checkIfExists: true) ]
UNTAR ( db )
KAIJU_KAIJU ( input, UNTAR.out.untar.map{ it[1] } )
KAIJU_KAIJU ( input, db )
}

View file

@ -5,15 +5,21 @@ nextflow.enable.dsl = 2
include { EXPANSIONHUNTER } from '../../../modules/expansionhunter/main.nf'
include { STRANGER } from '../../../modules/stranger/main.nf'
input = [ [ id:'test', gender:'male' ], // meta map
file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true),
file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true),
]
fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
variant_catalog = file(params.test_data['homo_sapiens']['genome']['repeat_expansions'], checkIfExists: true)
workflow test_stranger {
input = [ [ id:'test', gender:'male' ], // meta map
file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true),
file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true),
]
fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
variant_catalog = file(params.test_data['homo_sapiens']['genome']['repeat_expansions'], checkIfExists: true)
EXPANSIONHUNTER ( input, fasta, variant_catalog )
STRANGER ( EXPANSIONHUNTER.out.vcf )
STRANGER ( EXPANSIONHUNTER.out.vcf, variant_catalog )
}
workflow test_stranger_without_optional_variant_catalog {
EXPANSIONHUNTER ( input, fasta, variant_catalog )
STRANGER ( EXPANSIONHUNTER.out.vcf, [] )
}

View file

@ -8,6 +8,20 @@
- path: output/expansionhunter/versions.yml
md5sum: f3962a6eecfddf9682414c0f605a885a
- path: output/stranger/test.vcf.gz
md5sum: bbe15159195681d5c18596d3ad85c78f
md5sum: 68b0ca1319851134ffa8793a4704dc11
- path: output/stranger/versions.yml
md5sum: 5ec35fd835fb1be50bc3e7c004310fc0
- name: stranger test_stranger_without_optional_variant_catalog
command: nextflow run tests/modules/stranger -entry test_stranger_without_optional_variant_catalog -c tests/config/nextflow.config
tags:
- stranger
files:
- path: output/expansionhunter/test.vcf
md5sum: cfd4a1d35c0e469b99eb6aaa6d22de76
- path: output/expansionhunter/versions.yml
md5sum: c95af9e6d8cd9bd2ce1090ca4e7a6020
- path: output/stranger/test.vcf.gz
md5sum: bbe15159195681d5c18596d3ad85c78f
- path: output/stranger/versions.yml
md5sum: 8558542a007e90ea5dcdceed3f12585d