Merge pull request #9 from ewels/master

Added samtools, restructured directory structures
This commit is contained in:
Maxime Garcia 2019-12-06 10:37:42 +01:00 committed by GitHub
commit 9fd356468d
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
23 changed files with 401 additions and 34 deletions

23
.github/workflows/fastqc.yml vendored Normal file
View file

@ -0,0 +1,23 @@
name: FastQC
on:
push: {}
pull_request:
paths: tools/fastqc/*
jobs:
run_ci_test:
runs-on: ubuntu-latest
steps:
# Check out the repository
- uses: actions/checkout@v1
submodules: true
- name: Install Nextflow
run: |
wget -qO- get.nextflow.io | bash
sudo mv nextflow /usr/local/bin/
# Test the module
- run: nextflow run ./tools/fastqc/test/ -ansi-log false

23
.github/workflows/samtools_index.yml vendored Normal file
View file

@ -0,0 +1,23 @@
name: samtools index
on:
push: {}
pull_request:
paths: tools/samtools/index*
jobs:
run_ci_test:
runs-on: ubuntu-latest
steps:
# Check out the repository
- uses: actions/checkout@v1
submodules: true
- name: Install Nextflow
run: |
wget -qO- get.nextflow.io | bash
sudo mv nextflow /usr/local/bin/
# Test the module
- run: nextflow run ./tools/samtools/index/test/ -ansi-log false

23
.github/workflows/samtools_sort.yml vendored Normal file
View file

@ -0,0 +1,23 @@
name: samtools sort
on:
push: {}
pull_request:
paths: tools/samtools/sort*
jobs:
run_ci_test:
runs-on: ubuntu-latest
steps:
# Check out the repository
- uses: actions/checkout@v1
submodules: true
- name: Install Nextflow
run: |
wget -qO- get.nextflow.io | bash
sudo mv nextflow /usr/local/bin/
# Test the module
- run: nextflow run ./tools/samtools/sort/test/ -ansi-log false

23
.github/workflows/trim_galore.yml vendored Normal file
View file

@ -0,0 +1,23 @@
name: Trim Galore!
on:
push: {}
pull_request:
paths: tools/trim_galore/*
jobs:
run_ci_test:
runs-on: ubuntu-latest
steps:
# Check out the repository
- uses: actions/checkout@v1
submodules: true
- name: Install Nextflow
run: |
wget -qO- get.nextflow.io | bash
sudo mv nextflow /usr/local/bin/
# Test the module
- run: nextflow run ./tools/trim_galore/test/ -ansi-log false

3
.gitmodules vendored Normal file
View file

@ -0,0 +1,3 @@
[submodule "test-datasets"]
path = test-datasets
url = https://github.com/nf-core/test-datasets.git

View file

@ -0,0 +1,14 @@
#!/usr/bin/env nextflow
nextflow.preview.dsl = 2
cheers = Channel.from 'Bonjour', 'Ciao', 'Hello', 'Hola'
process check_output {
input:
val x from cheers
script:
"""
echo '$x world!'
"""
}

View file

@ -1,34 +0,0 @@
/*
* Description:
* Run FastQC on sequenced reads
* Keywords:
* read qc
* adapter
* Tools:
* FastQC:
* homepage: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/
* documentation: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/
* description: FastQC gives general quality metrics about your reads.
* It provides information about the quality score distribution
* across your reads, the per base sequence content (%A/C/G/T).
* You get information about adapter contamination and other
* overrepresented sequences.
*/
process fastqc {
tag "$sample_id"
publishDir "${params.outdir}/fastqc", mode: 'copy',
saveAs: {filename -> filename.indexOf(".zip") > 0 ? "zips/$filename" : "$filename"}
input:
set val(sample_id), file(reads)
output:
file "*_fastqc.{zip,html}"
script:
"""
fastqc -q $reads
fastqc --version &> fastqc.version.txt
"""
}

1
test-datasets Submodule

@ -0,0 +1 @@
Subproject commit e5fef88994b8d34c7bf4b07116e5f7a330d2ee3b

20
tools/fastqc/main.nf Normal file
View file

@ -0,0 +1,20 @@
process fastqc {
tag "FastQC - $sample_id"
publishDir "${params.outdir}/fastqc", mode: 'copy',
saveAs: {filename -> filename.indexOf(".zip") > 0 ? "zips/$filename" : "$filename"}
container 'quay.io/biocontainers/fastqc:0.11.8--2'
input:
tuple sample_id, path(reads)
output:
path "*_fastqc.{zip,html}"
script:
"""
fastqc -q $reads
fastqc --version &> fastqc.version.txt
"""
}

32
tools/fastqc/meta.yml Normal file
View file

@ -0,0 +1,32 @@
name: FastQC
description: Run FastQC on sequenced reads
keywords:
- Quality Control
- QC
- Adapters
tools:
- fastqc:
description: |
FastQC gives general quality metrics about your reads.
It provides information about the quality score distribution
across your reads, the per base sequence content (%A/C/G/T).
You get information about adapter contamination and other
overrepresented sequences.
homepage: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/
documentation: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/
input:
-
- sample_id:
type: string
description: Sample identifier
- reads:
type: file
description: Input FastQ file, or pair of files
output:
-
- report:
type: file
description: FastQC report
pattern: *_fastqc.{zip,html}
authors:
- @ewels

22
tools/fastqc/test/main.nf Normal file
View file

@ -0,0 +1,22 @@
#!/usr/bin/env nextflow
nextflow.preview.dsl = 2
include '../../../nf-core/module_testing/check_process_outputs.nf' params(params)
include '../main.nf' params(params)
// Define input channels
readPaths = [
['SRR4238351', ['../../../test-datasets/tools/fastqc/input/SRR4238351_subsamp.fastq.gz']],
['SRR4238355', ['../../../test-datasets/tools/fastqc/input/SRR4238355_subsamp.fastq.gz']],
['SRR4238359', ['../../../test-datasets/tools/fastqc/input/SRR4238359_subsamp.fastq.gz']],
['SRR4238379', ['../../../test-datasets/tools/fastqc/input/SRR4238379_subsamp.fastq.gz']]
]
Channel
.from(readPaths)
.map { row -> [ row[0], [ file(row[1][0]) ] ] }
.set { ch_read_files }
// Run the workflow
workflow {
fastqc(ch_read_files)
// .check_output()
}

View file

@ -0,0 +1,2 @@
docker.enabled = true
params.outdir = './results'

View file

@ -0,0 +1,22 @@
process samtools_index {
tag "${bam.baseName}"
container 'quay.io/biocontainers/samtools:1.9--h10a08f8_12'
input:
path(bam)
output:
path "*.sorted.bam"
script:
def suff_mem = ("${(task.memory.toBytes() - 6000000000) / task.cpus}" > 2000000000) ? 'true' : 'false'
def avail_mem = (task.memory && suff_mem) ? "-m" + "${(task.memory.toBytes() - 6000000000) / task.cpus}" : ''
"""
samtools sort $bam \\
-@ ${task.cpus} ${avail_mem} \\
-o ${bam.baseName}.sorted.bam
samtools --version &> v_samtools.txt
"""
}

View file

@ -0,0 +1,27 @@
name: samtools sort
description: Sort a BAM or CRAM file
keywords:
- sort
tools:
- samtools:
description: |
SAMtools is a set of utilities for interacting with and post-processing
short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li.
These files are generated as output by short read aligners like BWA.
homepage: http://www.htslib.org/
documentation: hhttp://www.htslib.org/doc/samtools.html
doi: 10.1093/bioinformatics/btp352
input:
-
- input:
type: file
description: Input BAM or CRAM file
pattern: *.{bam,cram}
output:
-
- index:
type: file
description: BAM or CRAM index file
pattern: *.{bai}
authors:
- @ewels

View file

@ -0,0 +1,13 @@
#!/usr/bin/env nextflow
echo true
cheers = Channel.from 'Bonjour', 'Ciao', 'Hello', 'Hola'
process sayHello {
input:
val x from cheers
script:
"""
echo '$x world!'
"""
}

View file

@ -0,0 +1,2 @@
docker.enabled = true
params.outdir = './results'

View file

@ -0,0 +1,18 @@
process samtools_index {
tag "${bam.baseName}"
container 'quay.io/biocontainers/samtools:1.9--h10a08f8_12'
input:
path(bam)
output:
path "*.bam.bai"
script:
"""
samtools index $bam
samtools --version &> v_samtools.txt
"""
}

View file

@ -0,0 +1,27 @@
name: samtools sort
description: Sort a BAM or CRAM file
keywords:
- sort
tools:
- samtools:
description: |
SAMtools is a set of utilities for interacting with and post-processing
short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li.
These files are generated as output by short read aligners like BWA.
homepage: http://www.htslib.org/
documentation: hhttp://www.htslib.org/doc/samtools.html
doi: 10.1093/bioinformatics/btp352
input:
-
- input:
type: file
description: Input BAM or CRAM file
pattern: *.{bam,cram}
output:
-
- sorted_file:
type: file
description: Sorted BAM or CRAM file
pattern: *.{bam,cram}
authors:
- @ewels

View file

@ -0,0 +1,13 @@
#!/usr/bin/env nextflow
echo true
cheers = Channel.from 'Bonjour', 'Ciao', 'Hello', 'Hola'
process sayHello {
input:
val x from cheers
script:
"""
echo '$x world!'
"""
}

View file

@ -0,0 +1,2 @@
docker.enabled = true
params.outdir = './results'

35
tools/trim_galore/main.nf Normal file
View file

@ -0,0 +1,35 @@
process fastqc {
tag "$sample_id"
publishDir "${params.outdir}/trim_galore", mode: 'copy',
saveAs: {filename ->
if (filename.indexOf("_fastqc") > 0) "FastQC/$filename"
else if (filename.indexOf("trimming_report.txt") > 0) "logs/$filename"
else filename
}
container 'quay.io/biocontainers/trim-galore:0.6.5--0'
input:
tuple sample_id, path(reads)
output:
tuple name, path("*fq.gz")
path "*trimming_report.txt"
path "*_fastqc.{zip,html}"
script:
c_r1 = clip_r1 > 0 ? "--clip_r1 ${clip_r1}" : ''
c_r2 = clip_r2 > 0 ? "--clip_r2 ${clip_r2}" : ''
tpc_r1 = three_prime_clip_r1 > 0 ? "--three_prime_clip_r1 ${three_prime_clip_r1}" : ''
tpc_r2 = three_prime_clip_r2 > 0 ? "--three_prime_clip_r2 ${three_prime_clip_r2}" : ''
nextseq = params.trim_nextseq > 0 ? "--nextseq ${params.trim_nextseq}" : ''
if (params.singleEnd) {
"""
trim_galore --fastqc --gzip $c_r1 $tpc_r1 $nextseq $reads
"""
} else {
"""
trim_galore --paired --fastqc --gzip $c_r1 $c_r2 $tpc_r1 $tpc_r2 $nextseq $reads
"""
}
}

View file

@ -0,0 +1,43 @@
name: Trim Galore!
description: Trim FastQ files using Trim Galore!
keywords:
- trimming
- adapters
- sequencing adapters
tools:
- fastqc:
description: |
A wrapper tool around Cutadapt and FastQC to consistently apply quality
and adapter trimming to FastQ files, with some extra functionality for
MspI-digested RRBS-type (Reduced Representation Bisufite-Seq) libraries.
homepage: https://www.bioinformatics.babraham.ac.uk/projects/trim_galore/
documentation: https://github.com/FelixKrueger/TrimGalore/blob/master/Docs/Trim_Galore_User_Guide.md
input:
-
- sample_id:
type: string
description: Sample identifier
- reads:
type: file
description: Input FastQ file, or pair of files
output:
-
- sample_id:
type: string
description: Sample identifier
- trimmed_fastq:
type: file
description: Trimmed FastQ files
pattern: *fq.gz
-
- report:
type: file
description: Trim Galore! trimming report
pattern: *trimming_report.txt
-
- fastqc_report:
type: file
description: FastQC report
pattern: *_fastqc.{zip,html}
authors:
- @ewels

View file

@ -0,0 +1,13 @@
#!/usr/bin/env nextflow
echo true
cheers = Channel.from 'Bonjour', 'Ciao', 'Hello', 'Hola'
process sayHello {
input:
val x from cheers
script:
"""
echo '$x world!'
"""
}