Merge pull request #25 from piotr-faba-ardigen/add_cutadapt_tool

Add cutadapt tool
This commit is contained in:
Alexander Peltzer 2020-03-16 18:46:00 +01:00 committed by GitHub
commit 9dee710b04
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
8 changed files with 169 additions and 1 deletions

34
.github/workflows/cutadapt.yml vendored Normal file
View file

@ -0,0 +1,34 @@
name: cutadapt
on:
push: {}
pull_request:
paths: tools/cutadapt/*
jobs:
run_ci_test:
runs-on: ubuntu-latest
steps:
# Check out the repository
- uses: actions/checkout@v2
- name: Checkout submodules
shell: bash
run: |
auth_header="$(git config --local --get http.https://github.com/.extraheader)"
git submodule sync --recursive
git -c "http.extraheader=$auth_header" -c protocol.version=2 submodule update --init --force --recursive --depth=1
- name: Install Nextflow
run: |
wget -qO- get.nextflow.io | bash
sudo mv nextflow /usr/local/bin/
- name: Test module with paired-end data
run: |
cd tools/cutadapt/test_paired/
nextflow run . -ansi-log false
- name: Test module with single-end data
run: |
cd tools/cutadapt/test_single/
nextflow run . -ansi-log false

@ -1 +1 @@
Subproject commit e5fef88994b8d34c7bf4b07116e5f7a330d2ee3b Subproject commit aae85a5c9c72238959108212481ce83bae569709

45
tools/cutadapt/main.nf Normal file
View file

@ -0,0 +1,45 @@
process cutadapt {
tag "${sample_id}"
container 'quay.io/biocontainers/cutadapt:1.16--py27_1'
input:
tuple val(sample_id), file(reads)
output:
tuple sample_id, file("trimmed_*.fastq")
script:
forward_fq = "trimmed_1.fastq"
reverse_fq = "trimmed_2.fastq"
if (params.singleEnd) {
processing = """
cutadapt \
-j ${task.cpus} \
-q $params.cutadapt_min_quality \
--minimum-length $params.cutadapt_min_length \
--output ${forward_fq} \
${reads}
"""
} else {
processing = """
cutadapt \
-j ${task.cpus} \
-q $params.cutadapt_min_quality \
--minimum-length $params.cutadapt_min_length \
--pair-filter=any \
--output ${forward_fq} \
--paired-output ${reverse_fq} ${reads}
"""
}
version = """
cutadapt --version &> v_cutadapt.txt
"""
return processing + version
}

36
tools/cutadapt/meta.yml Normal file
View file

@ -0,0 +1,36 @@
name: Cutadapt
description: cutadapt removes adapter sequences from high-throughput sequencing reads
keywords:
- Quality Control
- QC
- Adapters
tools:
- fastqc:
description: |
Cutadapt finds and removes adapter sequences, primers, poly-A tails and other types of unwanted sequence
from your high-throughput sequencing reads.
Cleaning your data in this way is often required: Reads from small-RNA sequencing contain the 3
sequencing adapter because the read is longer than the molecule that is sequenced. Amplicon reads
start with a primer sequence. Poly-A tails are useful for pulling out RNA from your sample, but
often you dont want them to be in your reads.
homepage: https://cutadapt.readthedocs.io/en/stable/
documentation: https://cutadapt.readthedocs.io/en/stable/
input:
-
- sample_id:
type: string
description: Sample identifier
- reads:
type: file
description: Input FastQ file, or pair of files
output:
-
- sample_id:
type: string
description: Sample identifier
- reads:
type: file
description: trimmed FastQ file, or pair of files
authors:
- @piotr-faba-ardigen

View file

@ -0,0 +1,14 @@
#!/usr/bin/env nextflow
nextflow.preview.dsl = 2
include '../main.nf' params(params)
// Define input channels
Channel
.fromFilePairs('../../../test-datasets/tools/cutadapt/input/*_{1,2}.fastq' )
.set { ch_read_files }
// Run the workflow
workflow {
cutadapt(ch_read_files)
}

View file

@ -0,0 +1,9 @@
docker.enabled = true
params.outdir = './results'
params{
//preprocessing options
cutadapt_min_length = 40
cutadapt_min_quality = 25
singleEnd = false
}

View file

@ -0,0 +1,21 @@
#!/usr/bin/env nextflow
nextflow.preview.dsl = 2
include '../main.nf' params(params)
// Define input channels
readPaths = [
['SRR4238351', '../../../test-datasets/tools/cutadapt/input/SRR4238351_subsamp.fastq.gz'],
['SRR4238355', '../../../test-datasets/tools/cutadapt/input/SRR4238355_subsamp.fastq.gz'],
['SRR4238359', '../../../test-datasets/tools/cutadapt/input/SRR4238359_subsamp.fastq.gz'],
['SRR4238379', '../../../test-datasets/tools/cutadapt/input/SRR4238379_subsamp.fastq.gz']
]
Channel
.from(readPaths)
.map { row -> [ row[0], [ file(row[1]) ] ] }
.set { ch_read_files }
// Run the workflow
workflow {
cutadapt(ch_read_files)
}

View file

@ -0,0 +1,9 @@
docker.enabled = true
params.outdir = './results'
params{
//preprocessing options
cutadapt_min_length = 40
cutadapt_min_quality = 25
singleEnd = true
}