add FCS adaptor (#2033)

* add FCS adaptor

* run prettier

* fix EClint

* add keywords to meta

* fix docker

Co-authored-by: Simon Pearce <24893913+SPPearce@users.noreply.github.com>
This commit is contained in:
Daniel Straub 2022-09-30 22:39:55 +02:00 committed by GitHub
parent 4eed099a12
commit c9889866a9
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 150 additions and 0 deletions

View file

@ -0,0 +1,49 @@
process FCS_FCSADAPTOR {
tag "$meta.id"
label 'process_low'
if (params.enable_conda) {
exit 1, "Conda environments cannot be used when using the FCS tool. Please use docker or singularity containers."
}
// WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions.
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://ftp.ncbi.nlm.nih.gov/genomes/TOOLS/FCS/releases/0.2.3/fcs-adaptor.0.2.3.sif':
'ncbi/fcs-adaptor:0.2.3' }"
input:
tuple val(meta), path(assembly)
output:
tuple val(meta), path("*.cleaned_sequences.fa.gz"), emit: cleaned_assembly
tuple val(meta), path("*.fcs_adaptor_report.txt") , emit: adaptor_report
tuple val(meta), path("*.fcs_adaptor.log") , emit: log
tuple val(meta), path("*.pipeline_args.yaml") , emit: pipeline_args
tuple val(meta), path("*.skipped_trims.jsonl") , emit: skipped_trims
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: '--prok' // --prok || --euk
def prefix = task.ext.prefix ?: "${meta.id}"
def FCSADAPTOR_VERSION = '0.2.3' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions.
"""
/app/fcs/bin/av_screen_x \\
-o output/ \\
$args \\
$assembly
# compress and/or rename files with prefix
gzip -cf output/cleaned_sequences/* > "${prefix}.cleaned_sequences.fa.gz"
cp "output/fcs_adaptor_report.txt" "${prefix}.fcs_adaptor_report.txt"
cp "output/fcs_adaptor.log" "${prefix}.fcs_adaptor.log"
cp "output/pipeline_args.yaml" "${prefix}.pipeline_args.yaml"
cp "output/skipped_trims.jsonl" "${prefix}.skipped_trims.jsonl"
cat <<-END_VERSIONS > versions.yml
"${task.process}":
FCS-adaptor: $FCSADAPTOR_VERSION
END_VERSIONS
"""
}

View file

@ -0,0 +1,62 @@
name: "fcs_fcsadaptor"
description: Run NCBI's FCS adaptor on assembled genomes
keywords:
- assembly
- genomics
- quality control
- contamination
- NCBI
tools:
- "fcs":
description: |
The Foreign Contamination Screening (FCS) tool rapidly detects contaminants from foreign
organisms in genome assemblies to prepare your data for submission. Therefore, the
submission process to NCBI is faster and fewer contaminated genomes are submitted.
This reduces errors in analyses and conclusions, not just for the original data submitter
but for all subsequent users of the assembly.
homepage: "https://www.ncbi.nlm.nih.gov/data-hub/cgr/data-quality-tools/"
documentation: "https://github.com/ncbi/fcs/wiki/FCS-adaptor"
tool_dev_url: "https://github.com/ncbi/fcs"
doi: ""
licence: "United States Government Work"
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- assembly:
type: file
description: assembly fasta file
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- cleaned_assembly:
type: file
description: Cleaned assembly in fasta format
pattern: "*.{cleaned_sequences.fa.gz}"
- adaptor_report:
type: file
description: Report of identified adaptors
pattern: "*.{fcs_adaptor_report.txt}"
- log:
type: file
description: Log file
pattern: "*.{fcs_adaptor.log}"
- pipeline_args:
type: file
description: Run arguments
pattern: "*.{pipeline_args.yaml}"
- skipped_trims:
type: file
description: Skipped trim information
pattern: "*.{skipped_trims.jsonl}"
authors:
- "@d4straub"

View file

@ -803,6 +803,10 @@ fasttree:
- modules/fasttree/** - modules/fasttree/**
- tests/modules/fasttree/** - tests/modules/fasttree/**
fcs/fcsadaptor:
- modules/fcs/fcsadaptor/**
- tests/modules/fcs/fcsadaptor/**
ffq: ffq:
- modules/ffq/** - modules/ffq/**
- tests/modules/ffq/** - tests/modules/ffq/**

View file

@ -0,0 +1,15 @@
#!/usr/bin/env nextflow
nextflow.enable.dsl = 2
include { FCS_FCSADAPTOR } from '../../../../modules/fcs/fcsadaptor/main.nf'
workflow test_fcs_fcsadaptor {
input = [
[ id:'test', single_end:false ], // meta map
file(params.test_data['bacteroides_fragilis']['genome']['genome_fna_gz'], checkIfExists: true)
]
FCS_FCSADAPTOR ( input )
}

View file

@ -0,0 +1,5 @@
process {
publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
}

View file

@ -0,0 +1,15 @@
- name: fcs fcsadaptor test_fcs_fcsadaptor
command: nextflow run ./tests/modules/fcs/fcsadaptor -entry test_fcs_fcsadaptor -c ./tests/config/nextflow.config -c ./tests/modules/fcs/fcsadaptor/nextflow.config
tags:
- fcs/fcsadaptor
- fcs
files:
- path: output/fcs/test.cleaned_sequences.fa.gz
md5sum: 2f331e59b352418420a35236091bf93c
- path: output/fcs/test.fcs_adaptor.log
contains: ["Original command:", "Executing:", "INFO:cwltool:[workflow ] completed success"]
- path: output/fcs/test.fcs_adaptor_report.txt
md5sum: 27375be4671e01d2d2674ddd1d44414a
- path: output/fcs/test.pipeline_args.yaml
md5sum: 80effed2a75550a6a861c47f4c30449d
- path: output/fcs/test.skipped_trims.jsonl