From 3e411cc6888cd926f8f2fe6c3c46128d284c5e04 Mon Sep 17 00:00:00 2001 From: "Thomas A. Christensen II" <25492070+MillironX@users.noreply.github.com> Date: Thu, 19 Oct 2023 11:38:43 -0500 Subject: [PATCH] refactor: Switch to SRA direct download for samples --- assets/schema_input.json | 8 ++++---- main.nf | 11 ++++++++--- 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/assets/schema_input.json b/assets/schema_input.json index 80cf68b..fbcd887 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -12,12 +12,12 @@ "pattern": "^\\S+$", "errorMessage": "Sample name must be provided and cannot contain spaces" }, - "fastq_1": { + "sra": { "type": "string", - "pattern": "^\\S+\\.f(ast)?q\\.gz$", - "errorMessage": "FastQ file for reads 1 must be provided, cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'" + "pattern": "^(((SR|ER|DR)[APRSX])|(SAM(N|EA|D))|(PRJ(NA|EB|DB))|(GS[EM])|(syn))(\\d+)$", + "errorMessage": "Please provide a valid SRA, ENA, DDBJ or GEO identifier" } }, - "required": ["sample", "fastq_1"] + "required": ["sample", "sra"] } } diff --git a/main.nf b/main.nf index 17096eb..1510ff6 100755 --- a/main.nf +++ b/main.nf @@ -1,9 +1,10 @@ #!/usr/bin/env nextflow -include { validateParameters } from 'plugin/nf-validation' +include { validateParameters; fromSamplesheet } from 'plugin/nf-validation' include { CLIQUESNV } from './modules/cliquesnv' include { EFETCH } from './modules/efetch' +include { FASTERQ_DUMP } from './modules/fasterq_dump' include { HAPLINK_HAPLOTYPES as HAPLINK_ML_HAPLOTYPES } from './modules/haplink/haplotypes' include { HAPLINK_HAPLOTYPES as HAPLINK_RAW_HAPLOTYPES } from './modules/haplink/haplotypes' include { HAPLINK_SEQUENCES } from './modules/haplink/sequences' @@ -20,8 +21,12 @@ workflow { validateParameters() Channel - .fromPath("*.fastq.gz") - .map { file -> tuple(file.simpleName, file) } + .fromSamplesheet("input") + .set { ch_sras } + + FASTERQ_DUMP( ch_sras ) + FASTERQ_DUMP + .out .set { ch_input } EFETCH("${params.reference}")