1
0
Fork 0
mirror of https://github.com/MillironX/taxprofiler.git synced 2024-11-11 01:43:09 +00:00

Filter long reads for MALT, bump cpus for FastQC for minigut to pass

This commit is contained in:
James Fellows Yates 2022-03-28 18:20:10 +02:00
parent eada201eb2
commit 94e5cfef4a
4 changed files with 22 additions and 3 deletions

View file

@ -27,7 +27,7 @@ process {
// TODO nf-core: Customise requirements for specific processes. // TODO nf-core: Customise requirements for specific processes.
// See https://www.nextflow.io/docs/latest/config.html#config-process-selectors // See https://www.nextflow.io/docs/latest/config.html#config-process-selectors
withLabel:process_low { withLabel:process_low {
cpus = { check_max( 2 * task.attempt, 'cpus' ) } cpus = { check_max( 4 * task.attempt, 'cpus' ) }
memory = { check_max( 12.GB * task.attempt, 'memory' ) } memory = { check_max( 12.GB * task.attempt, 'memory' ) }
time = { check_max( 4.h * task.attempt, 'time' ) } time = { check_max( 4.h * task.attempt, 'time' ) }
} }

View file

@ -15,7 +15,7 @@ params {
config_profile_description = 'Minimal test dataset to check pipeline function' config_profile_description = 'Minimal test dataset to check pipeline function'
// Limit resources so that this can run on GitHub Actions // Limit resources so that this can run on GitHub Actions
max_cpus = 2 max_cpus = 8
max_memory = '6.GB' max_memory = '6.GB'
max_time = '6.h' max_time = '6.h'
@ -29,3 +29,16 @@ params {
shortread_clipmerge = true shortread_clipmerge = true
} }
process {
withName: FASTQC {
cpus = { check_max( 8 * task.attempt, 'cpus' ) }
memory = { check_max( 6.GB * task.attempt, 'memory' ) }
time = { check_max( 6.h * task.attempt, 'time' ) }
}
withName: FASTQC_PROCESSED {
cpus = { check_max( 8 * task.attempt, 'cpus' ) }
memory = { check_max( 6.GB * task.attempt, 'memory' ) }
time = { check_max( 6.h * task.attempt, 'time' ) }
}
}

View file

@ -14,7 +14,7 @@ workflow DB_CHECK {
// TODO: make database sheet check // TODO: make database sheet check
// Checks: // Checks:
// 1) no duplicates, // 1) no duplicates,
// 2) dbs with no special arguments does not have quotes, e.g. just `,,` and NOT `,"",` // 2) args do not have quotes, e.g. just `,,` and NOT `,"",`
parsed_samplesheet = DATABASE_CHECK ( dbsheet ) parsed_samplesheet = DATABASE_CHECK ( dbsheet )
.csv .csv
.splitCsv ( header:true, sep:',' ) .splitCsv ( header:true, sep:',' )

View file

@ -90,9 +90,11 @@ workflow TAXPROFILER {
MODULE: Run FastQC MODULE: Run FastQC
*/ */
ch_input_for_fastqc = INPUT_CHECK.out.fastq.mix( INPUT_CHECK.out.nanopore ).dump(tag: "input_to_fastq") ch_input_for_fastqc = INPUT_CHECK.out.fastq.mix( INPUT_CHECK.out.nanopore ).dump(tag: "input_to_fastq")
FASTQC ( FASTQC (
ch_input_for_fastqc ch_input_for_fastqc
) )
ch_versions = ch_versions.mix(FASTQC.out.versions.first()) ch_versions = ch_versions.mix(FASTQC.out.versions.first())
CUSTOM_DUMPSOFTWAREVERSIONS ( CUSTOM_DUMPSOFTWAREVERSIONS (
@ -137,7 +139,11 @@ workflow TAXPROFILER {
// We groupTuple to have all samples in one channel for MALT as database // We groupTuple to have all samples in one channel for MALT as database
// loading takes a long time, so we only want to run it once per database // loading takes a long time, so we only want to run it once per database
// TODO document somewhere we only accept illumina short reads for MALT?
ch_input_for_malt = ch_input_for_profiling.malt ch_input_for_malt = ch_input_for_profiling.malt
.dump(tag: "input_to_malt_prefilter")
.filter { it[0]['instrument_platform'] == 'ILLUMINA' }
.dump(tag: "input_to_malt_postfilter")
.map { .map {
it -> it ->
def temp_meta = [ id: it[2]['db_name']] + it[2] def temp_meta = [ id: it[2]['db_name']] + it[2]