Merge pull request #63 from drpatelh/master

Add docs and tests for all samtools commands
This commit is contained in:
Harshil Patel 2020-08-07 15:40:03 +01:00 committed by GitHub
commit bd98988f23
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
53 changed files with 2162 additions and 450 deletions

30
.github/workflows/samtools_flagstat.yml vendored Normal file
View file

@ -0,0 +1,30 @@
name: samtools_flagstat
on:
push:
paths:
- software/samtools/flagstat/**
- .github/workflows/samtools_flagstat.yml
- tests
pull_request:
paths:
- software/samtools/flagstat/**
- .github/workflows/samtools_flagstat.yml
- tests
jobs:
ci_test:
runs-on: ubuntu-latest
env:
NXF_ANSI_LOG: false
steps:
- uses: actions/checkout@v2
- name: Install Nextflow
run: |
export NXF_VER="20.07.1"
wget -qO- get.nextflow.io | bash
sudo mv nextflow /usr/local/bin/
# Test the module
- run: nextflow run ./software/samtools/flagstat/test/ -profile docker

30
.github/workflows/samtools_idxstats.yml vendored Normal file
View file

@ -0,0 +1,30 @@
name: samtools_idxstats
on:
push:
paths:
- software/samtools/idxstats/**
- .github/workflows/samtools_idxstats.yml
- tests
pull_request:
paths:
- software/samtools/idxstats/**
- .github/workflows/samtools_idxstats.yml
- tests
jobs:
ci_test:
runs-on: ubuntu-latest
env:
NXF_ANSI_LOG: false
steps:
- uses: actions/checkout@v2
- name: Install Nextflow
run: |
export NXF_VER="20.07.1"
wget -qO- get.nextflow.io | bash
sudo mv nextflow /usr/local/bin/
# Test the module
- run: nextflow run ./software/samtools/idxstats/test/ -profile docker

30
.github/workflows/samtools_index.yml vendored Normal file
View file

@ -0,0 +1,30 @@
name: samtools_index
on:
push:
paths:
- software/samtools/index/**
- .github/workflows/samtools_index.yml
- tests
pull_request:
paths:
- software/samtools/index/**
- .github/workflows/samtools_index.yml
- tests
jobs:
ci_test:
runs-on: ubuntu-latest
env:
NXF_ANSI_LOG: false
steps:
- uses: actions/checkout@v2
- name: Install Nextflow
run: |
export NXF_VER="20.07.1"
wget -qO- get.nextflow.io | bash
sudo mv nextflow /usr/local/bin/
# Test the module
- run: nextflow run ./software/samtools/index/test/ -profile docker

30
.github/workflows/samtools_sort.yml vendored Normal file
View file

@ -0,0 +1,30 @@
name: samtools_sort
on:
push:
paths:
- software/samtools/sort/**
- .github/workflows/samtools_sort.yml
- tests
pull_request:
paths:
- software/samtools/sort/**
- .github/workflows/samtools_sort.yml
- tests
jobs:
ci_test:
runs-on: ubuntu-latest
env:
NXF_ANSI_LOG: false
steps:
- uses: actions/checkout@v2
- name: Install Nextflow
run: |
export NXF_VER="20.07.1"
wget -qO- get.nextflow.io | bash
sudo mv nextflow /usr/local/bin/
# Test the module
- run: nextflow run ./software/samtools/sort/test/ -profile docker

30
.github/workflows/samtools_stats.yml vendored Normal file
View file

@ -0,0 +1,30 @@
name: samtools_stats
on:
push:
paths:
- software/samtools/stats/**
- .github/workflows/samtools_stats.yml
- tests
pull_request:
paths:
- software/samtools/stats/**
- .github/workflows/samtools_stats.yml
- tests
jobs:
ci_test:
runs-on: ubuntu-latest
env:
NXF_ANSI_LOG: false
steps:
- uses: actions/checkout@v2
- name: Install Nextflow
run: |
export NXF_VER="20.07.1"
wget -qO- get.nextflow.io | bash
sudo mv nextflow /usr/local/bin/
# Test the module
- run: nextflow run ./software/samtools/stats/test/ -profile docker

View file

@ -4,13 +4,14 @@ name: software_tool
## TODO nf-core: Add a description and keywords
description: Run FastQC on sequenced reads
keywords:
- Quality Control
- QC
- Adapters
- quality control
- qc
- adapters
- fastq
tools:
## TODO nf-core: Change the name of "software_tool" below
- software_tool:
## TODO nf-core: Add a description and other details for the tool below
## TODO nf-core: Change the name of "software" below
- software:
## TODO nf-core: Add a description and other details for the software below
description: |
FastQC gives general quality metrics about your reads.
It provides information about the quality score distribution
@ -63,15 +64,15 @@ output:
- html:
type: file
description: FastQC report
pattern: "*_fastqc.html"
pattern: "*_{fastqc.html}"
- zip:
type: file
description: FastQC report archive
pattern: "*_fastqc.zip"
pattern: "*_{fastqc.zip}"
- version:
type: file
description: File containing software version
pattern: "*.version.txt"
pattern: "*.{version.txt}"
## TODO nf-core: Add your GitHub username below
authors:
- "@your_github_username"

View file

@ -4,6 +4,7 @@ keywords:
- index
- fasta
- genome
- reference
tools:
- bwa:
description: |
@ -31,8 +32,7 @@ params:
input:
- fasta:
type: file
description: |
Input genome fasta file
description: Input genome fasta file
- options:
type: map
description: |
@ -42,11 +42,11 @@ output:
- index:
type: file
description: BWA genome index files
pattern: "*.{fasta}.{amb,ann,bwt,pac,sa}"
pattern: "*.{amb,ann,bwt,pac,sa}"
- version:
type: file
description: File containing software version
pattern: "*.version.txt"
pattern: "*.{version.txt}"
authors:
- "@drpatelh"
- "@maxulysse"

View file

@ -5,6 +5,9 @@ keywords:
- bwa
- alignment
- map
- fastq
- bam
- sam
tools:
- bwa:
description: |
@ -42,13 +45,11 @@ input:
respectively.
- index:
type: file
description: |
BWA genome index files
description: BWA genome index files
pattern: "*.{amb,ann,bwt,pac,sa}"
- fasta:
type: file
description: |
Input genome fasta file
description: Input genome fasta file
- options:
type: map
description: |
@ -58,11 +59,11 @@ output:
- bam:
type: file
description: Output BAM file containing read alignments
pattern: "*.bam"
pattern: "*.{bam}"
- version:
type: file
description: File containing software version
pattern: "*.version.txt"
pattern: "*.{version.txt}"
authors:
- "@drpatelh"
- "@jeremy1805"

View file

@ -4,6 +4,7 @@ keywords:
- quality control
- qc
- adapters
- fastq
tools:
- fastqc:
description: |
@ -55,15 +56,15 @@ output:
- html:
type: file
description: FastQC report
pattern: "*_fastqc.html"
pattern: "*_{fastqc.html}"
- zip:
type: file
description: FastQC report archive
pattern: "*_fastqc.zip"
pattern: "*_{fastqc.zip}"
- version:
type: file
description: File containing software version
pattern: "*.version.txt"
pattern: "*.{version.txt}"
authors:
- "@drpatelh"
- "@grst"

View file

@ -0,0 +1,69 @@
name: samtools_flagstat
description: Counts the number of alignments in a BAM/CRAM/SAM file for each FLAG type
keywords:
- stats
- mapping
- counts
- bam
- sam
- cram
tools:
- samtools:
description: |
SAMtools is a set of utilities for interacting with and post-processing
short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li.
These files are generated as output by short read aligners like BWA.
homepage: http://www.htslib.org/
documentation: hhttp://www.htslib.org/doc/samtools.html
doi: 10.1093/bioinformatics/btp352
params:
- outdir:
type: string
description: |
The pipeline's output directory. By default, the module will
output files into `$params.outdir/<SOFTWARE>`
- publish_dir_mode:
type: string
description: |
Value for the Nextflow `publishDir` mode parameter.
Available: symlink, rellink, link, copy, copyNoFollow, move.
- conda:
type: boolean
description: |
Run the module with Conda using the software specified
via the `conda` directive
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- bam:
type: file
description: BAM/CRAM/SAM file
pattern: "*.{bam,cram,sam}"
- bai:
type: file
description: Index for BAM/CRAM/SAM file
pattern: "*.{bai,crai,sai}"
- options:
type: map
description: |
Groovy Map containing module options for passing command-line arguments and
output file paths.
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- flagstat:
type: file
description: File containing samtools flagstat output
pattern: "*.{flagstat}"
- version:
type: file
description: File containing software version
pattern: "*.{version.txt}"
authors:
- "@drpatelh"

View file

@ -0,0 +1 @@
../../../../../tests/data/bam/test.paired_end.sorted.bam

View file

@ -0,0 +1 @@
../../../../../tests/data/bam/test.paired_end.sorted.bam.bai

View file

@ -0,0 +1,19 @@
#!/usr/bin/env nextflow
nextflow.enable.dsl = 2
include { SAMTOOLS_FLAGSTAT } from '../main.nf'
workflow test {
def input = []
input = [ [ id:'test', single_end:false ], // meta map
file("${baseDir}/input/test.paired_end.sorted.bam", checkIfExists: true),
file("${baseDir}/input/test.paired_end.sorted.bam.bai", checkIfExists: true) ]
SAMTOOLS_FLAGSTAT ( input, [:] )
}
workflow {
test()
}

View file

@ -0,0 +1,20 @@
params {
outdir = "output/"
publish_dir_mode = "copy"
conda = false
}
profiles {
conda {
params.conda = true
}
docker {
docker.enabled = true
docker.runOptions = '-u \$(id -u):\$(id -g)'
}
singularity {
singularity.enabled = true
singularity.autoMounts = true
}
}

View file

@ -0,0 +1,13 @@
20000 + 0 in total (QC-passed reads + QC-failed reads)
0 + 0 secondary
0 + 0 supplementary
0 + 0 duplicates
20000 + 0 mapped (100.00% : N/A)
20000 + 0 paired in sequencing
10000 + 0 read1
10000 + 0 read2
20000 + 0 properly paired (100.00% : N/A)
20000 + 0 with itself and mate mapped
0 + 0 singletons (0.00% : N/A)
0 + 0 with mate mapped to a different chr
0 + 0 with mate mapped to a different chr (mapQ>=5)

View file

@ -0,0 +1,70 @@
name: samtools_idxstats
description: Reports alignment summary statistics for a BAM/CRAM/SAM file
keywords:
- stats
- mapping
- counts
- chromosome
- bam
- sam
- cram
tools:
- samtools:
description: |
SAMtools is a set of utilities for interacting with and post-processing
short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li.
These files are generated as output by short read aligners like BWA.
homepage: http://www.htslib.org/
documentation: hhttp://www.htslib.org/doc/samtools.html
doi: 10.1093/bioinformatics/btp352
params:
- outdir:
type: string
description: |
The pipeline's output directory. By default, the module will
output files into `$params.outdir/<SOFTWARE>`
- publish_dir_mode:
type: string
description: |
Value for the Nextflow `publishDir` mode parameter.
Available: symlink, rellink, link, copy, copyNoFollow, move.
- conda:
type: boolean
description: |
Run the module with Conda using the software specified
via the `conda` directive
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- bam:
type: file
description: BAM/CRAM/SAM file
pattern: "*.{bam,cram,sam}"
- bai:
type: file
description: Index for BAM/CRAM/SAM file
pattern: "*.{bai,crai,sai}"
- options:
type: map
description: |
Groovy Map containing module options for passing command-line arguments and
output file paths.
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- idxstats:
type: file
description: File containing samtools idxstats output
pattern: "*.{idxstats}"
- version:
type: file
description: File containing software version
pattern: "*.{version.txt}"
authors:
- "@drpatelh"

View file

@ -0,0 +1 @@
../../../../../tests/data/bam/test.paired_end.sorted.bam

View file

@ -0,0 +1 @@
../../../../../tests/data/bam/test.paired_end.sorted.bam.bai

View file

@ -0,0 +1,19 @@
#!/usr/bin/env nextflow
nextflow.enable.dsl = 2
include { SAMTOOLS_IDXSTATS } from '../main.nf'
workflow test {
def input = []
input = [ [ id:'test', single_end:false ], // meta map
file("${baseDir}/input/test.paired_end.sorted.bam", checkIfExists: true),
file("${baseDir}/input/test.paired_end.sorted.bam.bai", checkIfExists: true) ]
SAMTOOLS_IDXSTATS ( input, [:] )
}
workflow {
test()
}

View file

@ -0,0 +1,20 @@
params {
outdir = "output/"
publish_dir_mode = "copy"
conda = false
}
profiles {
conda {
params.conda = true
}
docker {
docker.enabled = true
docker.runOptions = '-u \$(id -u):\$(id -g)'
}
singularity {
singularity.enabled = true
singularity.autoMounts = true
}
}

View file

@ -0,0 +1,2 @@
gi|170079663|ref|NC_010473.1| 4686137 20000 0
* 0 0 0

View file

@ -1,7 +1,10 @@
name: samtools index
description: index a BAM or CRAM file
name: samtools_index
description: Index SAM/BAM/CRAM file
keywords:
- index
- bam
- sam
- cram
tools:
- samtools:
description: |
@ -11,17 +14,51 @@ tools:
homepage: http://www.htslib.org/
documentation: hhttp://www.htslib.org/doc/samtools.html
doi: 10.1093/bioinformatics/btp352
params:
- outdir:
type: string
description: |
The pipeline's output directory. By default, the module will
output files into `$params.outdir/<SOFTWARE>`
- publish_dir_mode:
type: string
description: |
Value for the Nextflow `publishDir` mode parameter.
Available: symlink, rellink, link, copy, copyNoFollow, move.
- conda:
type: boolean
description: |
Run the module with Conda using the software specified
via the `conda` directive
input:
-
- input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- bam:
type: file
description: Input BAM or CRAM file
pattern: "*.{bam,cram}"
description: BAM/CRAM/SAM file
pattern: "*.{bam,cram,sam}"
- options:
type: map
description: |
Groovy Map containing module options for passing command-line arguments and
output file paths.
output:
-
- index:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- bai:
type: file
description: BAM or CRAM index file
pattern: "*.{bai}"
description: BAM/CRAM/SAM index file
pattern: "*.{bai,crai,sai}"
- version:
type: file
description: File containing software version
pattern: "*.{version.txt}"
authors:
- "@drpatelh"
- "@ewels"

View file

@ -0,0 +1 @@
../../../../../tests/data/bam/test.paired_end.sorted.bam

23
software/samtools/index/test/main.nf Normal file → Executable file
View file

@ -1,13 +1,18 @@
#!/usr/bin/env nextflow
echo true
cheers = Channel.from 'Bonjour', 'Ciao', 'Hello', 'Hola'
nextflow.enable.dsl = 2
process sayHello {
input:
val x from cheers
script:
"""
echo '$x world!'
"""
include { SAMTOOLS_INDEX } from '../main.nf'
workflow test {
def input = []
input = [ [ id:'test', single_end:false ], // meta map
file("${baseDir}/input/test.paired_end.sorted.bam", checkIfExists: true) ]
SAMTOOLS_INDEX ( input, [:] )
}
workflow {
test()
}

View file

@ -1,2 +1,20 @@
params {
outdir = "output/"
publish_dir_mode = "copy"
conda = false
}
profiles {
conda {
params.conda = true
}
docker {
docker.enabled = true
params.outdir = './results'
docker.runOptions = '-u \$(id -u):\$(id -g)'
}
singularity {
singularity.enabled = true
singularity.autoMounts = true
}
}

View file

@ -1,7 +1,10 @@
name: samtools sort
description: Sort a BAM or CRAM file
name: samtools_sort
description: Sort SAM/BAM/CRAM file
keywords:
- sort
- bam
- sam
- cram
tools:
- samtools:
description: |
@ -11,17 +14,51 @@ tools:
homepage: http://www.htslib.org/
documentation: hhttp://www.htslib.org/doc/samtools.html
doi: 10.1093/bioinformatics/btp352
params:
- outdir:
type: string
description: |
The pipeline's output directory. By default, the module will
output files into `$params.outdir/<SOFTWARE>`
- publish_dir_mode:
type: string
description: |
Value for the Nextflow `publishDir` mode parameter.
Available: symlink, rellink, link, copy, copyNoFollow, move.
- conda:
type: boolean
description: |
Run the module with Conda using the software specified
via the `conda` directive
input:
-
- input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- bam:
type: file
description: Input BAM or CRAM file
pattern: "*.{bam,cram}"
description: BAM/CRAM/SAM file
pattern: "*.{bam,cram,sam}"
- options:
type: map
description: |
Groovy Map containing module options for passing command-line arguments and
output file paths.
output:
-
- sorted_file:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- bam:
type: file
description: Sorted BAM or CRAM file
pattern: "*.{bam,cram}"
description: Sorted BAM/CRAM/SAM file
pattern: "*.{bam,cram,sam}"
- version:
type: file
description: File containing software version
pattern: "*.{version.txt}"
authors:
- "@drpatelh"
- "@ewels"

View file

@ -0,0 +1 @@
../../../../../tests/data/bam/test.paired_end.name.sorted.bam

23
software/samtools/sort/test/main.nf Normal file → Executable file
View file

@ -1,13 +1,18 @@
#!/usr/bin/env nextflow
echo true
cheers = Channel.from 'Bonjour', 'Ciao', 'Hello', 'Hola'
nextflow.enable.dsl = 2
process sayHello {
input:
val x from cheers
script:
"""
echo '$x world!'
"""
include { SAMTOOLS_SORT } from '../main.nf'
workflow test {
def input = []
input = [ [ id:'test', single_end:false ], // meta map
file("${baseDir}/input/test.paired_end.name.sorted.bam", checkIfExists: true) ]
SAMTOOLS_SORT ( input, [:] )
}
workflow {
test()
}

View file

@ -1,2 +1,20 @@
params {
outdir = "output/"
publish_dir_mode = "copy"
conda = false
}
profiles {
conda {
params.conda = true
}
docker {
docker.enabled = true
params.outdir = './results'
docker.runOptions = '-u \$(id -u):\$(id -g)'
}
singularity {
singularity.enabled = true
singularity.autoMounts = true
}
}

Binary file not shown.

View file

@ -0,0 +1,68 @@
name: samtools_stats
description: Produces comprehensive statistics from SAM/BAM/CRAM file
keywords:
- statistics
- counts
- bam
- sam
- cram
tools:
- samtools:
description: |
SAMtools is a set of utilities for interacting with and post-processing
short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li.
These files are generated as output by short read aligners like BWA.
homepage: http://www.htslib.org/
documentation: hhttp://www.htslib.org/doc/samtools.html
doi: 10.1093/bioinformatics/btp352
params:
- outdir:
type: string
description: |
The pipeline's output directory. By default, the module will
output files into `$params.outdir/<SOFTWARE>`
- publish_dir_mode:
type: string
description: |
Value for the Nextflow `publishDir` mode parameter.
Available: symlink, rellink, link, copy, copyNoFollow, move.
- conda:
type: boolean
description: |
Run the module with Conda using the software specified
via the `conda` directive
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- bam:
type: file
description: BAM/CRAM/SAM file
pattern: "*.{bam,cram,sam}"
- bai:
type: file
description: Index for BAM/CRAM/SAM file
pattern: "*.{bai,crai,sai}"
- options:
type: map
description: |
Groovy Map containing module options for passing command-line arguments and
output file paths.
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- stats:
type: file
description: File containing samtools stats output
pattern: "*.{stats}"
- version:
type: file
description: File containing software version
pattern: "*.{version.txt}"
authors:
- "@drpatelh"

View file

@ -0,0 +1 @@
../../../../../tests/data/bam/test.paired_end.sorted.bam

View file

@ -0,0 +1 @@
../../../../../tests/data/bam/test.paired_end.sorted.bam.bai

View file

@ -0,0 +1,19 @@
#!/usr/bin/env nextflow
nextflow.enable.dsl = 2
include { SAMTOOLS_STATS } from '../main.nf'
workflow test {
def input = []
input = [ [ id:'test', single_end:false ], // meta map
file("${baseDir}/input/test.paired_end.sorted.bam", checkIfExists: true),
file("${baseDir}/input/test.paired_end.sorted.bam.bai", checkIfExists: true) ]
SAMTOOLS_STATS ( input, [:] )
}
workflow {
test()
}

View file

@ -0,0 +1,20 @@
params {
outdir = "output/"
publish_dir_mode = "copy"
conda = false
}
profiles {
conda {
params.conda = true
}
docker {
docker.enabled = true
docker.runOptions = '-u \$(id -u):\$(id -g)'
}
singularity {
singularity.enabled = true
singularity.autoMounts = true
}
}

File diff suppressed because it is too large Load diff

View file

@ -4,6 +4,7 @@ keywords:
- trimming
- adapters
- sequencing adapters
- fastq
tools:
- trimgalore:
description: |
@ -75,23 +76,23 @@ output:
description: |
List of input adapter trimmed FastQ files of size 1 and 2 for
single-end and paired-end data, respectively.
pattern: "*.fq.gz"
pattern: "*.{fq.gz}"
- html:
type: file
description: FastQC report (optional)
pattern: "*_fastqc.html"
pattern: "*_{fastqc.html}"
- zip:
type: file
description: FastQC report archive (optional)
pattern: "*_fastqc.zip"
pattern: "*_{fastqc.zip}"
- log:
type: file
description: Trim Galore! trimming report
pattern: "*report.txt"
pattern: "*_{report.txt}"
- version:
type: file
description: File containing software version
pattern: "*.version.txt"
pattern: "*.{version.txt}"
authors:
- "@drpatelh"
- "@ewels"

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

BIN
tests/data/fastq/dna/Ecoli_DNA_R1.fastq.gz Normal file → Executable file

Binary file not shown.

BIN
tests/data/fastq/dna/Ecoli_DNA_R2.fastq.gz Normal file → Executable file

Binary file not shown.

View file

@ -1,14 +0,0 @@
#!/usr/bin/env nextflow
nextflow.preview.dsl = 2
cheers = Channel.from 'Bonjour', 'Ciao', 'Hello', 'Hola'
process check_output {
input:
val x from cheers
script:
"""
echo '$x world!'
"""
}