From af01fccba1be7ffa4bc1d84d8e31e4f0a3f6c86f Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Tue, 7 Jul 2020 14:31:11 +0200 Subject: [PATCH 01/14] Add a HOPS profile for nf-core/eager @ SHH --- conf/pipeline/eager/shh.config | 42 ++++++++++++++++++++++++++++++---- 1 file changed, 38 insertions(+), 4 deletions(-) diff --git a/conf/pipeline/eager/shh.config b/conf/pipeline/eager/shh.config index 5d8b576..8001823 100644 --- a/conf/pipeline/eager/shh.config +++ b/conf/pipeline/eager/shh.config @@ -5,10 +5,6 @@ params { config_profile_contact = 'James Fellows Yates (@jfy133)' config_profile_description = 'nf-core/eager SHH profile provided by nf-core/configs' igenomes_base = "/projects1/public_data/igenomes/" - - // default BWA - bwaalnn = 0.04 - bwaalnl = 32 } // Specific nf-core/eager process configuration @@ -26,6 +22,44 @@ process { } profiles { + hops { + pathogen_screening { + fasta = '/projects1/Reference_Genomes/Human/HG19/hg19_complete.fasta' + bwa_index = '/projects1/Reference_Genomes/Human/HG19/hg19_complete.fasta' + fasta_index = '/projects1/Reference_Genomes/Human/HG19/hg19_complete.fasta.fai' + seq_dict = '/projects1/Reference_Genomes/Human/HG19/hg19_complete.dict' + bwaalnn = 0.04 + bwaalnl = 32 + run_bam_filtering = true + bam_discard_unmapped = true + bam_unmapped_type = 'fastq' + run_metagenomic_screening = true + metagenomic_tool = 'malt' + metagenomic_min_support_reads = 1 + database = '/projects1/malt/databases/indexed/index040/full-bac-full-vir-etal-nov_2017' + percent_identity = 90 + malt_mode = 'BlastN' + malt_alignment_mode = 'SemiGlobal' + malt_top_percent = 1 + malt_min_support_mode = 'percent' + malt_min_support_percent = 0.01 + malt_max_queries = 100 + malt_memory_mode = 'load' + run_maltextract = true + maltextract_taxon_list = '/projects1/users/key/anc5h/soi.backup/List_of_pathogens_KB_fmk12_wViruses1.txt' + maltextract_ncbifiles = '/projects1/clusterhomes/huebler/RMASifter/RMA_Extractor_Resources/' + maltextract_filter = 'def_anc' + maltextract_toppercent = 0.01 + maltextract_destackingoff = false + maltextract_downsamplingoff = false + maltextract_duplicateremovaloff = false + maltextract_matches = false + maltextract_megansummary = false + maltextract_percentidentity = 90.0 + maltextract_topalignment = false + maltextract_singlestranded = false + } + } pathogen_loose { params { config_profile_description = 'Pathogen (loose) MPI-SHH profile, provided by nf-core/configs.' From b2e043ab59e2cf693e6d7e08df49cfbbb93bd960 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Tue, 7 Jul 2020 14:35:42 +0200 Subject: [PATCH 02/14] Add documentation for HOPS profile for eager @ SHH --- docs/pipeline/eager/shh.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/pipeline/eager/shh.md b/docs/pipeline/eager/shh.md index 8377ded..b19d935 100644 --- a/docs/pipeline/eager/shh.md +++ b/docs/pipeline/eager/shh.md @@ -16,3 +16,4 @@ Specific configurations for shh has been made for eager. * If running with the MALT module turned on, the MALT process by default will be sent to the long queue with a resource requirement minimum of 725GB and 64 cores. If this fails, the process will be tried once more only and sent to the supercruncher queue. The module will not retry after this, and pipeline will fail. Note, this will only work on SDAG. * Provides additional group specific profiles, which adapt the `bwa aln` mapping parameters to each context: `pathogens_loose` (`-l 0.01 -n 16`), `pathogens_strict` (`-l 32, -n 0.1`) and `human` (`-l 16500, -n 0.01`). +* Provides a `hops` profile with default paths and parameters for the Pathogen group. From 737d826932ab17efa822af6462c15dec6fc8e05a Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Wed, 8 Jul 2020 08:36:04 +0200 Subject: [PATCH 03/14] Update shh.config --- conf/pipeline/eager/shh.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/pipeline/eager/shh.config b/conf/pipeline/eager/shh.config index 8001823..dabff38 100644 --- a/conf/pipeline/eager/shh.config +++ b/conf/pipeline/eager/shh.config @@ -23,7 +23,7 @@ process { profiles { hops { - pathogen_screening { + params { fasta = '/projects1/Reference_Genomes/Human/HG19/hg19_complete.fasta' bwa_index = '/projects1/Reference_Genomes/Human/HG19/hg19_complete.fasta' fasta_index = '/projects1/Reference_Genomes/Human/HG19/hg19_complete.fasta.fai' From aac3478e86f1afd19ce2e99fc34e1c19f2a162c4 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Wed, 8 Jul 2020 09:12:11 +0200 Subject: [PATCH 04/14] Add better task time values to account for large data --- conf/pipeline/eager/shh.config | 43 ++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/conf/pipeline/eager/shh.config b/conf/pipeline/eager/shh.config index dabff38..718242c 100644 --- a/conf/pipeline/eager/shh.config +++ b/conf/pipeline/eager/shh.config @@ -19,6 +19,49 @@ process { time = 1440.h queue = { task.memory > 756.GB ? 'supercruncher' : 'long' } } + + withLabel:'sc_tiny'{ + cpus = { check_max( 1, 'cpus' ) } + memory = { check_max( 1.GB * task.attempt, 'memory' ) } + time = { check_max( task.attempt == 3 ? 120.h : task.attempt == 2 ? 48.h : task.attempt == 1 ? 24.h : 1.h, 'time' ) } + } + + withLabel:'sc_small'{ + cpus = { check_max( 1, 'cpus' ) } + memory = { check_max( 4.GB * task.attempt, 'memory' ) } + time = { check_max( task.attempt == 3 ? 120.h : task.attempt == 2 ? 48.h : task.attempt == 1 ? 24.h : 1.h, 'time' ) } + } + + withLabel:'sc_medium'{ + cpus = { check_max( 1, 'cpus' ) } + memory = { check_max( 8.GB * task.attempt, 'memory' ) } + time = { check_max( task.attempt == 3 ? 120.h : task.attempt == 2 ? 48.h : task.attempt == 1 ? 24.h : 1.h, 'time' ) } + } + + withLabel:'mc_small'{ + cpus = { check_max( 2, 'cpus' ) } + memory = { check_max( 4.GB * task.attempt, 'memory' ) } + time = { check_max( task.attempt == 3 ? 120.h : task.attempt == 2 ? 48.h : task.attempt == 1 ? 24.h : 1.h, 'time' ) } + } + + withLabel:'mc_medium' { + cpus = { check_max( 4, 'cpus' ) } + memory = { check_max( 8.GB * task.attempt, 'memory' ) } + time = { check_max( task.attempt == 3 ? 120.h : task.attempt == 2 ? 48.h : task.attempt == 1 ? 24.h : 1.h, 'time' ) } + } + + withLabel:'mc_large'{ + cpus = { check_max( 8, 'cpus' ) } + memory = { check_max( 16.GB * task.attempt, 'memory' ) } + time = { check_max( task.attempt == 3 ? 120.h : task.attempt == 2 ? 48.h : task.attempt == 1 ? 24.h : 1.h, 'time' ) } + } + + withLabel:'mc_huge'{ + cpus = { check_max( 32, 'cpus' ) } + memory = { check_max( 256.GB * task.attempt, 'memory' ) } + time = { check_max( task.attempt == 3 ? 120.h : task.attempt == 2 ? 48.h : task.attempt == 1 ? 24.h : 1.h, 'time' ) } + } + } profiles { From 31edb4b5d7b0e5d9f0017251ce992ac982d66aaf Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Wed, 8 Jul 2020 09:20:00 +0200 Subject: [PATCH 05/14] Update shh.config --- conf/pipeline/eager/shh.config | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/conf/pipeline/eager/shh.config b/conf/pipeline/eager/shh.config index 718242c..56f5cb9 100644 --- a/conf/pipeline/eager/shh.config +++ b/conf/pipeline/eager/shh.config @@ -23,43 +23,43 @@ process { withLabel:'sc_tiny'{ cpus = { check_max( 1, 'cpus' ) } memory = { check_max( 1.GB * task.attempt, 'memory' ) } - time = { check_max( task.attempt == 3 ? 120.h : task.attempt == 2 ? 48.h : task.attempt == 1 ? 24.h : 1.h, 'time' ) } + time = { task.attempt == 3 ? 120.h : task.attempt == 2 ? 48.h : task.attempt == 1 ? 24.h : 1.h } } withLabel:'sc_small'{ cpus = { check_max( 1, 'cpus' ) } memory = { check_max( 4.GB * task.attempt, 'memory' ) } - time = { check_max( task.attempt == 3 ? 120.h : task.attempt == 2 ? 48.h : task.attempt == 1 ? 24.h : 1.h, 'time' ) } + time = { task.attempt == 3 ? 120.h : task.attempt == 2 ? 48.h : task.attempt == 1 ? 24.h : 1.h } } withLabel:'sc_medium'{ cpus = { check_max( 1, 'cpus' ) } memory = { check_max( 8.GB * task.attempt, 'memory' ) } - time = { check_max( task.attempt == 3 ? 120.h : task.attempt == 2 ? 48.h : task.attempt == 1 ? 24.h : 1.h, 'time' ) } + time = { task.attempt == 3 ? 120.h : task.attempt == 2 ? 48.h : task.attempt == 1 ? 24.h : 1.h } } withLabel:'mc_small'{ cpus = { check_max( 2, 'cpus' ) } memory = { check_max( 4.GB * task.attempt, 'memory' ) } - time = { check_max( task.attempt == 3 ? 120.h : task.attempt == 2 ? 48.h : task.attempt == 1 ? 24.h : 1.h, 'time' ) } + time = { task.attempt == 3 ? 120.h : task.attempt == 2 ? 48.h : task.attempt == 1 ? 24.h : 1.h } } withLabel:'mc_medium' { cpus = { check_max( 4, 'cpus' ) } memory = { check_max( 8.GB * task.attempt, 'memory' ) } - time = { check_max( task.attempt == 3 ? 120.h : task.attempt == 2 ? 48.h : task.attempt == 1 ? 24.h : 1.h, 'time' ) } + time = { task.attempt == 3 ? 120.h : task.attempt == 2 ? 48.h : task.attempt == 1 ? 24.h : 1.h } } withLabel:'mc_large'{ cpus = { check_max( 8, 'cpus' ) } memory = { check_max( 16.GB * task.attempt, 'memory' ) } - time = { check_max( task.attempt == 3 ? 120.h : task.attempt == 2 ? 48.h : task.attempt == 1 ? 24.h : 1.h, 'time' ) } + time = { task.attempt == 3 ? 120.h : task.attempt == 2 ? 48.h : task.attempt == 1 ? 24.h : 1.h } } withLabel:'mc_huge'{ cpus = { check_max( 32, 'cpus' ) } memory = { check_max( 256.GB * task.attempt, 'memory' ) } - time = { check_max( task.attempt == 3 ? 120.h : task.attempt == 2 ? 48.h : task.attempt == 1 ? 24.h : 1.h, 'time' ) } + time = { task.attempt == 3 ? 120.h : task.attempt == 2 ? 48.h : task.attempt == 1 ? 24.h : 1.h } } } From b360ce26bcd5aa69f99722573585655daa88525f Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Wed, 8 Jul 2020 09:24:06 +0200 Subject: [PATCH 06/14] Update shh.config --- conf/pipeline/eager/shh.config | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/conf/pipeline/eager/shh.config b/conf/pipeline/eager/shh.config index 56f5cb9..935d2a5 100644 --- a/conf/pipeline/eager/shh.config +++ b/conf/pipeline/eager/shh.config @@ -23,43 +23,43 @@ process { withLabel:'sc_tiny'{ cpus = { check_max( 1, 'cpus' ) } memory = { check_max( 1.GB * task.attempt, 'memory' ) } - time = { task.attempt == 3 ? 120.h : task.attempt == 2 ? 48.h : task.attempt == 1 ? 24.h : 1.h } + time = { task.attempt == 3 ? 1440.h : task.attempt == 2 ? 48.h : task.attempt == 1 ? 2.h } } withLabel:'sc_small'{ cpus = { check_max( 1, 'cpus' ) } memory = { check_max( 4.GB * task.attempt, 'memory' ) } - time = { task.attempt == 3 ? 120.h : task.attempt == 2 ? 48.h : task.attempt == 1 ? 24.h : 1.h } + time = { task.attempt == 3 ? 1440.h : task.attempt == 2 ? 48.h : task.attempt == 1 ? 2.h } } withLabel:'sc_medium'{ cpus = { check_max( 1, 'cpus' ) } memory = { check_max( 8.GB * task.attempt, 'memory' ) } - time = { task.attempt == 3 ? 120.h : task.attempt == 2 ? 48.h : task.attempt == 1 ? 24.h : 1.h } + time = { task.attempt == 3 ? 1440.h : task.attempt == 2 ? 48.h : 2.h } } withLabel:'mc_small'{ cpus = { check_max( 2, 'cpus' ) } memory = { check_max( 4.GB * task.attempt, 'memory' ) } - time = { task.attempt == 3 ? 120.h : task.attempt == 2 ? 48.h : task.attempt == 1 ? 24.h : 1.h } + time = { task.attempt == 3 ? 1440.h : task.attempt == 2 ? 48.h : 2.h } } withLabel:'mc_medium' { cpus = { check_max( 4, 'cpus' ) } memory = { check_max( 8.GB * task.attempt, 'memory' ) } - time = { task.attempt == 3 ? 120.h : task.attempt == 2 ? 48.h : task.attempt == 1 ? 24.h : 1.h } + time = { task.attempt == 3 ? 1440.h : task.attempt == 2 ? 48.h : 2.h } } withLabel:'mc_large'{ cpus = { check_max( 8, 'cpus' ) } memory = { check_max( 16.GB * task.attempt, 'memory' ) } - time = { task.attempt == 3 ? 120.h : task.attempt == 2 ? 48.h : task.attempt == 1 ? 24.h : 1.h } + time = { task.attempt == 3 ? 1440.h : task.attempt == 2 ? 48.h : 2.h } } withLabel:'mc_huge'{ cpus = { check_max( 32, 'cpus' ) } memory = { check_max( 256.GB * task.attempt, 'memory' ) } - time = { task.attempt == 3 ? 120.h : task.attempt == 2 ? 48.h : task.attempt == 1 ? 24.h : 1.h } + time = { task.attempt == 3 ? 1440.h : task.attempt == 2 ? 48.h : 2.h } } } From c6905b609e0422a36501e38b788a5003ded69526 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Wed, 8 Jul 2020 09:25:21 +0200 Subject: [PATCH 07/14] Update shh.config --- conf/pipeline/eager/shh.config | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/conf/pipeline/eager/shh.config b/conf/pipeline/eager/shh.config index 935d2a5..3d0f195 100644 --- a/conf/pipeline/eager/shh.config +++ b/conf/pipeline/eager/shh.config @@ -23,13 +23,13 @@ process { withLabel:'sc_tiny'{ cpus = { check_max( 1, 'cpus' ) } memory = { check_max( 1.GB * task.attempt, 'memory' ) } - time = { task.attempt == 3 ? 1440.h : task.attempt == 2 ? 48.h : task.attempt == 1 ? 2.h } + time = { task.attempt == 3 ? 1440.h : task.attempt == 2 ? 48.h : 2.h } } withLabel:'sc_small'{ cpus = { check_max( 1, 'cpus' ) } memory = { check_max( 4.GB * task.attempt, 'memory' ) } - time = { task.attempt == 3 ? 1440.h : task.attempt == 2 ? 48.h : task.attempt == 1 ? 2.h } + time = { task.attempt == 3 ? 1440.h : task.attempt == 2 ? 48.h : 2.h } } withLabel:'sc_medium'{ From 563c27e1bb053c6658a76c50ab0655c157210fb9 Mon Sep 17 00:00:00 2001 From: jfy133 Date: Wed, 8 Jul 2020 09:36:28 +0200 Subject: [PATCH 08/14] Try again to get better time resource submission --- conf/pipeline/eager/shh.config | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/conf/pipeline/eager/shh.config b/conf/pipeline/eager/shh.config index 3d0f195..718242c 100644 --- a/conf/pipeline/eager/shh.config +++ b/conf/pipeline/eager/shh.config @@ -23,43 +23,43 @@ process { withLabel:'sc_tiny'{ cpus = { check_max( 1, 'cpus' ) } memory = { check_max( 1.GB * task.attempt, 'memory' ) } - time = { task.attempt == 3 ? 1440.h : task.attempt == 2 ? 48.h : 2.h } + time = { check_max( task.attempt == 3 ? 120.h : task.attempt == 2 ? 48.h : task.attempt == 1 ? 24.h : 1.h, 'time' ) } } withLabel:'sc_small'{ cpus = { check_max( 1, 'cpus' ) } memory = { check_max( 4.GB * task.attempt, 'memory' ) } - time = { task.attempt == 3 ? 1440.h : task.attempt == 2 ? 48.h : 2.h } + time = { check_max( task.attempt == 3 ? 120.h : task.attempt == 2 ? 48.h : task.attempt == 1 ? 24.h : 1.h, 'time' ) } } withLabel:'sc_medium'{ cpus = { check_max( 1, 'cpus' ) } memory = { check_max( 8.GB * task.attempt, 'memory' ) } - time = { task.attempt == 3 ? 1440.h : task.attempt == 2 ? 48.h : 2.h } + time = { check_max( task.attempt == 3 ? 120.h : task.attempt == 2 ? 48.h : task.attempt == 1 ? 24.h : 1.h, 'time' ) } } withLabel:'mc_small'{ cpus = { check_max( 2, 'cpus' ) } memory = { check_max( 4.GB * task.attempt, 'memory' ) } - time = { task.attempt == 3 ? 1440.h : task.attempt == 2 ? 48.h : 2.h } + time = { check_max( task.attempt == 3 ? 120.h : task.attempt == 2 ? 48.h : task.attempt == 1 ? 24.h : 1.h, 'time' ) } } withLabel:'mc_medium' { cpus = { check_max( 4, 'cpus' ) } memory = { check_max( 8.GB * task.attempt, 'memory' ) } - time = { task.attempt == 3 ? 1440.h : task.attempt == 2 ? 48.h : 2.h } + time = { check_max( task.attempt == 3 ? 120.h : task.attempt == 2 ? 48.h : task.attempt == 1 ? 24.h : 1.h, 'time' ) } } withLabel:'mc_large'{ cpus = { check_max( 8, 'cpus' ) } memory = { check_max( 16.GB * task.attempt, 'memory' ) } - time = { task.attempt == 3 ? 1440.h : task.attempt == 2 ? 48.h : 2.h } + time = { check_max( task.attempt == 3 ? 120.h : task.attempt == 2 ? 48.h : task.attempt == 1 ? 24.h : 1.h, 'time' ) } } withLabel:'mc_huge'{ cpus = { check_max( 32, 'cpus' ) } memory = { check_max( 256.GB * task.attempt, 'memory' ) } - time = { task.attempt == 3 ? 1440.h : task.attempt == 2 ? 48.h : 2.h } + time = { check_max( task.attempt == 3 ? 120.h : task.attempt == 2 ? 48.h : task.attempt == 1 ? 24.h : 1.h, 'time' ) } } } From 926c14f7ee0368869214671d595179c9777635d1 Mon Sep 17 00:00:00 2001 From: jfy133 Date: Wed, 8 Jul 2020 09:36:56 +0200 Subject: [PATCH 09/14] Cleverer process time submission --- conf/pipeline/eager/shh.config | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/conf/pipeline/eager/shh.config b/conf/pipeline/eager/shh.config index 718242c..560dee5 100644 --- a/conf/pipeline/eager/shh.config +++ b/conf/pipeline/eager/shh.config @@ -10,11 +10,11 @@ params { // Specific nf-core/eager process configuration process { - maxRetries = 5 + maxRetries = 2 withName: malt { maxRetries = 1 - memory = { task.attempt > 1 ? 1900.GB : 725.GB } + memory = { task.attempt > 1 ? 1900.GB : 725.GB } cpus = { task.attempt > 1 ? 112 : 64 } time = 1440.h queue = { task.memory > 756.GB ? 'supercruncher' : 'long' } @@ -23,43 +23,43 @@ process { withLabel:'sc_tiny'{ cpus = { check_max( 1, 'cpus' ) } memory = { check_max( 1.GB * task.attempt, 'memory' ) } - time = { check_max( task.attempt == 3 ? 120.h : task.attempt == 2 ? 48.h : task.attempt == 1 ? 24.h : 1.h, 'time' ) } + time = { task.attempt == 3 ? 1440.h : task.attempt == 2 ? 48.h : 1.h } } withLabel:'sc_small'{ cpus = { check_max( 1, 'cpus' ) } memory = { check_max( 4.GB * task.attempt, 'memory' ) } - time = { check_max( task.attempt == 3 ? 120.h : task.attempt == 2 ? 48.h : task.attempt == 1 ? 24.h : 1.h, 'time' ) } + time = { task.attempt == 3 ? 1440.h : task.attempt == 2 ? 48.h : 1.h } } withLabel:'sc_medium'{ cpus = { check_max( 1, 'cpus' ) } memory = { check_max( 8.GB * task.attempt, 'memory' ) } - time = { check_max( task.attempt == 3 ? 120.h : task.attempt == 2 ? 48.h : task.attempt == 1 ? 24.h : 1.h, 'time' ) } + time = { task.attempt == 3 ? 1440.h : task.attempt == 2 ? 48.h : 1.h } } withLabel:'mc_small'{ cpus = { check_max( 2, 'cpus' ) } memory = { check_max( 4.GB * task.attempt, 'memory' ) } - time = { check_max( task.attempt == 3 ? 120.h : task.attempt == 2 ? 48.h : task.attempt == 1 ? 24.h : 1.h, 'time' ) } + time = { task.attempt == 3 ? 1440.h : task.attempt == 2 ? 48.h : 1.h } } withLabel:'mc_medium' { cpus = { check_max( 4, 'cpus' ) } memory = { check_max( 8.GB * task.attempt, 'memory' ) } - time = { check_max( task.attempt == 3 ? 120.h : task.attempt == 2 ? 48.h : task.attempt == 1 ? 24.h : 1.h, 'time' ) } + time = { task.attempt == 3 ? 1440.h : task.attempt == 2 ? 48.h : 1.h } } withLabel:'mc_large'{ cpus = { check_max( 8, 'cpus' ) } memory = { check_max( 16.GB * task.attempt, 'memory' ) } - time = { check_max( task.attempt == 3 ? 120.h : task.attempt == 2 ? 48.h : task.attempt == 1 ? 24.h : 1.h, 'time' ) } + time = { task.attempt == 3 ? 1440.h : task.attempt == 2 ? 48.h : 1.h } } withLabel:'mc_huge'{ cpus = { check_max( 32, 'cpus' ) } memory = { check_max( 256.GB * task.attempt, 'memory' ) } - time = { check_max( task.attempt == 3 ? 120.h : task.attempt == 2 ? 48.h : task.attempt == 1 ? 24.h : 1.h, 'time' ) } + time = { task.attempt == 3 ? 1449.h : task.attempt == 2 ? 48.h : 1.h } } } @@ -99,7 +99,7 @@ profiles { maltextract_matches = false maltextract_megansummary = false maltextract_percentidentity = 90.0 - maltextract_topalignment = false + maltextract_topalignment = false maltextract_singlestranded = false } } From 15dad7e2e716869614ce1e2040f3d4e7b633f5c6 Mon Sep 17 00:00:00 2001 From: jfy133 Date: Wed, 8 Jul 2020 09:47:03 +0200 Subject: [PATCH 10/14] Add caveats for EAGER @ SHH profiles --- conf/pipeline/eager/shh.config | 1 + docs/pipeline/eager/shh.md | 17 +++++++++++++++-- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/conf/pipeline/eager/shh.config b/conf/pipeline/eager/shh.config index 560dee5..9414da2 100644 --- a/conf/pipeline/eager/shh.config +++ b/conf/pipeline/eager/shh.config @@ -65,6 +65,7 @@ process { } profiles { + // IMPORTANT this profile is not reproducible due to hardcoded paths. For initial/automated screening ONLY. hops { params { fasta = '/projects1/Reference_Genomes/Human/HG19/hg19_complete.fasta' diff --git a/docs/pipeline/eager/shh.md b/docs/pipeline/eager/shh.md index b19d935..f83c37d 100644 --- a/docs/pipeline/eager/shh.md +++ b/docs/pipeline/eager/shh.md @@ -14,6 +14,19 @@ Example: `nextflow run nf-core/eager -profile shh` Specific configurations for shh has been made for eager. +### General profiles + * If running with the MALT module turned on, the MALT process by default will be sent to the long queue with a resource requirement minimum of 725GB and 64 cores. If this fails, the process will be tried once more only and sent to the supercruncher queue. The module will not retry after this, and pipeline will fail. Note, this will only work on SDAG. -* Provides additional group specific profiles, which adapt the `bwa aln` mapping parameters to each context: `pathogens_loose` (`-l 0.01 -n 16`), `pathogens_strict` (`-l 32, -n 0.1`) and `human` (`-l 16500, -n 0.01`). -* Provides a `hops` profile with default paths and parameters for the Pathogen group. + +### Contextual profiles + +#### Human Pop-Gen + +* `human`: optimised for mapping of human aDNA reads (i.e. bwa aln defaults as `-l 16500, -n 0.01`) + +#### Pathogen + +* `pathogen_loose`: optimised for mapping of human aDNA reads (i.e. bwa aln defaults as `-l 16 -n 0.01`) +* `pathogen_strict`: optimised for mapping of human aDNA reads (i.e. bwa aln defaults as `-l 32, -n 0.1`) +* `hops`: profile with default paths and parameters for automated/initial pathogen screening. + * :warning: This is NOT a reproducible profile as it contains hardcoded paths. This should only be used for initial/automated screening where you wish to quickly check for any possible positives; after which you should re-do screening in a reproducible manner for publication! From c57a404f187f6a51e508f68a302ffecb12d48034 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Wed, 8 Jul 2020 10:45:43 +0200 Subject: [PATCH 11/14] Add desc for hops --- conf/pipeline/eager/shh.config | 1 + 1 file changed, 1 insertion(+) diff --git a/conf/pipeline/eager/shh.config b/conf/pipeline/eager/shh.config index 9414da2..ce5e6c4 100644 --- a/conf/pipeline/eager/shh.config +++ b/conf/pipeline/eager/shh.config @@ -68,6 +68,7 @@ profiles { // IMPORTANT this profile is not reproducible due to hardcoded paths. For initial/automated screening ONLY. hops { params { + config_profile_description = 'Rough HOPS screening MPI-SHH profile, provided by nf-core/configs.' fasta = '/projects1/Reference_Genomes/Human/HG19/hg19_complete.fasta' bwa_index = '/projects1/Reference_Genomes/Human/HG19/hg19_complete.fasta' fasta_index = '/projects1/Reference_Genomes/Human/HG19/hg19_complete.fasta.fai' From 1af9ce293c0eb38255391e804585f025d1834b04 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Wed, 8 Jul 2020 15:22:33 +0200 Subject: [PATCH 12/14] Update hops profile based on requests from pathogen group --- conf/pipeline/eager/shh.config | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/conf/pipeline/eager/shh.config b/conf/pipeline/eager/shh.config index ce5e6c4..ec7bff1 100644 --- a/conf/pipeline/eager/shh.config +++ b/conf/pipeline/eager/shh.config @@ -73,8 +73,8 @@ profiles { bwa_index = '/projects1/Reference_Genomes/Human/HG19/hg19_complete.fasta' fasta_index = '/projects1/Reference_Genomes/Human/HG19/hg19_complete.fasta.fai' seq_dict = '/projects1/Reference_Genomes/Human/HG19/hg19_complete.dict' - bwaalnn = 0.04 - bwaalnl = 32 + bwaalnn = 0.01 + bwaalnl = 16 run_bam_filtering = true bam_discard_unmapped = true bam_unmapped_type = 'fastq' @@ -82,11 +82,11 @@ profiles { metagenomic_tool = 'malt' metagenomic_min_support_reads = 1 database = '/projects1/malt/databases/indexed/index040/full-bac-full-vir-etal-nov_2017' - percent_identity = 90 + percent_identity = 85 malt_mode = 'BlastN' malt_alignment_mode = 'SemiGlobal' malt_top_percent = 1 - malt_min_support_mode = 'percent' + malt_min_support_mode = 'reads' malt_min_support_percent = 0.01 malt_max_queries = 100 malt_memory_mode = 'load' @@ -99,10 +99,9 @@ profiles { maltextract_downsamplingoff = false maltextract_duplicateremovaloff = false maltextract_matches = false - maltextract_megansummary = false - maltextract_percentidentity = 90.0 + maltextract_megansummary = true + maltextract_percentidentity = 85.0 maltextract_topalignment = false - maltextract_singlestranded = false } } pathogen_loose { From 8f6409d1440b7990102d7e5503e1c1c6d38c6c1d Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Wed, 8 Jul 2020 15:26:19 +0200 Subject: [PATCH 13/14] Remove min support percent value to avoid confusion --- conf/pipeline/eager/shh.config | 1 - 1 file changed, 1 deletion(-) diff --git a/conf/pipeline/eager/shh.config b/conf/pipeline/eager/shh.config index ec7bff1..b3b6c3e 100644 --- a/conf/pipeline/eager/shh.config +++ b/conf/pipeline/eager/shh.config @@ -87,7 +87,6 @@ profiles { malt_alignment_mode = 'SemiGlobal' malt_top_percent = 1 malt_min_support_mode = 'reads' - malt_min_support_percent = 0.01 malt_max_queries = 100 malt_memory_mode = 'load' run_maltextract = true From 84e03b8264f1985b01a9cce6a7f2fe899376a4f7 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Sat, 11 Jul 2020 18:31:04 +0200 Subject: [PATCH 14/14] Re-bump default walltimes after debugging/testing --- conf/pipeline/eager/shh.config | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/conf/pipeline/eager/shh.config b/conf/pipeline/eager/shh.config index b3b6c3e..c48db8b 100644 --- a/conf/pipeline/eager/shh.config +++ b/conf/pipeline/eager/shh.config @@ -23,43 +23,43 @@ process { withLabel:'sc_tiny'{ cpus = { check_max( 1, 'cpus' ) } memory = { check_max( 1.GB * task.attempt, 'memory' ) } - time = { task.attempt == 3 ? 1440.h : task.attempt == 2 ? 48.h : 1.h } + time = { task.attempt == 3 ? 1440.h : task.attempt == 2 ? 48.h : 2.h } } withLabel:'sc_small'{ cpus = { check_max( 1, 'cpus' ) } memory = { check_max( 4.GB * task.attempt, 'memory' ) } - time = { task.attempt == 3 ? 1440.h : task.attempt == 2 ? 48.h : 1.h } + time = { task.attempt == 3 ? 1440.h : task.attempt == 2 ? 48.h : 2.h } } withLabel:'sc_medium'{ cpus = { check_max( 1, 'cpus' ) } memory = { check_max( 8.GB * task.attempt, 'memory' ) } - time = { task.attempt == 3 ? 1440.h : task.attempt == 2 ? 48.h : 1.h } + time = { task.attempt == 3 ? 1440.h : task.attempt == 2 ? 48.h : 2.h } } withLabel:'mc_small'{ cpus = { check_max( 2, 'cpus' ) } memory = { check_max( 4.GB * task.attempt, 'memory' ) } - time = { task.attempt == 3 ? 1440.h : task.attempt == 2 ? 48.h : 1.h } + time = { task.attempt == 3 ? 1440.h : task.attempt == 2 ? 48.h : 2.h } } withLabel:'mc_medium' { cpus = { check_max( 4, 'cpus' ) } memory = { check_max( 8.GB * task.attempt, 'memory' ) } - time = { task.attempt == 3 ? 1440.h : task.attempt == 2 ? 48.h : 1.h } + time = { task.attempt == 3 ? 1440.h : task.attempt == 2 ? 48.h : 2.h } } withLabel:'mc_large'{ cpus = { check_max( 8, 'cpus' ) } memory = { check_max( 16.GB * task.attempt, 'memory' ) } - time = { task.attempt == 3 ? 1440.h : task.attempt == 2 ? 48.h : 1.h } + time = { task.attempt == 3 ? 1440.h : task.attempt == 2 ? 48.h : 2.h } } withLabel:'mc_huge'{ cpus = { check_max( 32, 'cpus' ) } memory = { check_max( 256.GB * task.attempt, 'memory' ) } - time = { task.attempt == 3 ? 1449.h : task.attempt == 2 ? 48.h : 1.h } + time = { task.attempt == 3 ? 1449.h : task.attempt == 2 ? 48.h : 2.h } } }