Add bases2fastq (#2054)

* feat(bases2fastq): nf-core modules create

* test(bases2fastq): Update tests

* docs(bases2fastq): Update meta.yml

* test: Add untar rundir
This commit is contained in:
Edmund Miller 2022-09-15 13:50:33 +00:00 committed by GitHub
parent 513b5fcab9
commit 4963ca0570
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 167 additions and 0 deletions

View file

@ -0,0 +1,45 @@
process BASES2FASTQ {
tag "$meta.id"
label 'process_high'
if (params.enable_conda) {
exit 1, "Conda environments cannot be used when using bases2fastq. Please use docker or singularity containers."
}
container "elembio/bases2fastq:1.1.0"
input:
tuple val(meta), path(run_manifest), path(run_dir)
output:
tuple val(meta), path('output/Samples/*/*_R*.fastq.gz'), emit: sample_fastq
tuple val(meta), path('output/Samples/*/*.json') , emit: sample_json
tuple val(meta), path('output/*.html') , emit: qc_report
tuple val(meta), path('output/RunStats.json') , emit: run_stats
tuple val(meta), path('output/RunManifest.json') , emit: generated_run_manifest
tuple val(meta), path('output/Metrics.csv') , emit: metrics
tuple val(meta), path('output/UnassignedSequences.csv'), emit: unassigned
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def runManifest = run_manifest ? "-r ${run_manifest}" : ""
"""
ls
bases2fastq \\
-p $task.cpus \\
$runManifest \\
$args \\
$run_dir \\
output
cat <<-END_VERSIONS > versions.yml
"${task.process}":
bases2fastq: \$(bases2fastq --version | sed -e "s/bases2fastq version //g")
END_VERSIONS
"""
}

View file

@ -0,0 +1,68 @@
name: "bases2fastq"
description: Demultiplex Element Biosciences bases files
keywords:
- demultiplex
- element
- fastq
tools:
- "bases2fastq":
description: "Demultiplexes sequencing data and converts base calls into FASTQ files for secondary analysis"
homepage: "https://go.elementbiosciences.com/bases2fastq-download"
documentation: "https://www.elementbiosciences.com/resources/user-guides/workflow/bases2fastq"
licence: "http://go.elembio.link/eula"
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- run_manifest:
type: file
description: RunManifest file
pattern: "*.{csv}"
- run_dir:
type: directory
description: "Input run directory containing optionally containing a RunManifest.json if run_manifest is not supplied"
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- sample_fastq:
type: file
description: Demultiplexed sample FASTQ files
pattern: "output/Samples/*/*_R*.fastq.gz"
- sample_json:
type: file
description: Demultiplexed sample stats
pattern: "output/Samples/*/*.json"
- qc_report:
type: file
description: QC HTML report
pattern: "output/*.html"
- run_stats:
type: file
description: QC HTML report
pattern: "output/*.html"
- generated_run_manifest:
type: file
description: Updated Run Manifest JSON from the run_manifest csv
pattern: "output/RunManifest.json"
- metrics:
type: file
description: Sample metrics
pattern: "output/Metrics.csv"
- unassigned:
type: file
description: Unassigned Sequences
pattern: "output/UnassignedSequences.csv"
authors:
- "@Emiller88"

View file

@ -134,6 +134,10 @@ bandage/image:
- modules/bandage/image/**
- tests/modules/bandage/image/**
bases2fastq:
- modules/bases2fastq/**
- tests/modules/bases2fastq/**
bbmap/align:
- modules/bbmap/align/**
- tests/modules/bbmap/align/**

View file

@ -0,0 +1,23 @@
#!/usr/bin/env nextflow
nextflow.enable.dsl = 2
include { BASES2FASTQ } from '../../../modules/bases2fastq/main.nf'
include { UNTAR } from '../../../modules/untar/main.nf'
workflow test_bases2fastq {
input = Channel.value([
[ id:'sim-data' ], // meta map
file("https://raw.githubusercontent.com/nf-core/test-datasets/demultiplex/testdata/sim-data/RunManifest.csv", checkIfExists: true),
])
ch_input = input.join(
UNTAR ( [
[ id:'sim-data' ],
file("https://github.com/nf-core/test-datasets/raw/demultiplex/testdata/sim-data/sim-data.tar.gz", checkIfExists: true)
]).untar
)
BASES2FASTQ ( ch_input )
}

View file

@ -0,0 +1,9 @@
process {
publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
withName: UNTAR {
publishDir = [ enabled: false ]
}
}

View file

@ -0,0 +1,18 @@
- name: "bases2fastq"
command: nextflow run ./tests/modules/bases2fastq -entry test_bases2fastq -c ./tests/config/nextflow.config -c ./tests/modules/bases2fastq/nextflow.config
tags:
- "bases2fastq"
files:
- path: output/bases2fastq/output/Bases2Fastq-Sim_QC.html
- path: output/bases2fastq/output/Metrics.csv
md5sum: 0ec6da2b82e191098283474356024abf
- path: output/bases2fastq/output/RunManifest.json
md5sum: a07dce8ee25c2a6f9355b677c26b53e2
- path: output/bases2fastq/output/RunStats.json
- path: output/bases2fastq/output/UnassignedSequences.csv
md5sum: 11c1693830ce941b8cfb8d2431a59097
- path: output/bases2fastq/output/Samples/DefaultSample/DefaultSample_R1.fastq.gz
md5sum: 218abc70f61e8e8199a68f83ae836184
- path: output/bases2fastq/output/Samples/DefaultSample/DefaultSample_R2.fastq.gz
md5sum: b95109bfb204daa150b61239d3368ee2
- path: output/bases2fastq/output/Samples/DefaultSample/DefaultSample_stats.json