Merge pull request #64 from drpatelh/master

Add tests and docs for picard tools
This commit is contained in:
Harshil Patel 2020-08-07 16:46:59 +01:00 committed by GitHub
commit 47b6dd70e4
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
31 changed files with 1164 additions and 80 deletions

View file

@ -1,35 +0,0 @@
name: cutadapt
on:
push:
paths:
- software/cutadapt/**
- .github/workflows/cutadapt.yml
- tests
pull_request:
paths:
- software/cutadapt/**
- .github/workflows/cutadapt.yml
- tests
jobs:
run_ci_test:
runs-on: ubuntu-latest
env:
NXF_ANSI_LOG: false
steps:
- uses: actions/checkout@v2
- name: Install Nextflow
run: |
wget -qO- get.nextflow.io | bash
sudo mv nextflow /usr/local/bin/
- name: Test module with paired-end data
run: |
cd software/cutadapt/test/
nextflow run .
- name: Test module with single-end data
run: |
cd software/cutadapt/test/
nextflow run . --single_end

View file

@ -1,31 +0,0 @@
name: tcoffee
on:
push:
paths:
- software/tcoffee/**
- .github/workflows/tcoffee.yml
- tests
pull_request:
paths:
- software/tcoffee/**
- .github/workflows/tcoffee.yml
- tests
jobs:
run_ci_test:
runs-on: ubuntu-latest
env:
NXF_ANSI_LOG: false
steps:
- uses: actions/checkout@v2
- name: Install Nextflow
run: |
wget -qO- get.nextflow.io | bash
sudo mv nextflow /usr/local/bin/
# Test the module
- run: |
cd software/tcoffee/test/
nextflow run .

View file

@ -0,0 +1,30 @@
name: picard_collectmultiplemetrics.yml
on:
push:
paths:
- software/picard/collectmultiplemetrics/**
- .github/workflows/picard_collectmultiplemetrics.yml
- tests
pull_request:
paths:
- software/picard/collectmultiplemetrics/**
- .github/workflows/picard_collectmultiplemetrics.yml
- tests
jobs:
ci_test:
runs-on: ubuntu-latest
env:
NXF_ANSI_LOG: false
steps:
- uses: actions/checkout@v2
- name: Install Nextflow
run: |
export NXF_VER="20.07.1"
wget -qO- get.nextflow.io | bash
sudo mv nextflow /usr/local/bin/
# Test the module
- run: nextflow run ./software/picard/collectmultiplemetrics/test/ -profile docker

View file

@ -1,18 +1,18 @@
name: samtools sort name: picard_markduplicates
on: on:
push: push:
paths: paths:
- software/samtools/sort** - software/picard/markduplicates/**
- .github/workflows/samtools_sort.yml - .github/workflows/picard_markduplicates.yml
- tests - tests
pull_request: pull_request:
paths: paths:
- software/samtools/sort** - software/picard/markduplicates/**
- .github/workflows/samtools_sort.yml - .github/workflows/picard_markduplicates.yml
- tests - tests
jobs: jobs:
run_ci_test: ci_test:
runs-on: ubuntu-latest runs-on: ubuntu-latest
env: env:
NXF_ANSI_LOG: false NXF_ANSI_LOG: false
@ -22,8 +22,9 @@ jobs:
- name: Install Nextflow - name: Install Nextflow
run: | run: |
export NXF_VER="20.07.1"
wget -qO- get.nextflow.io | bash wget -qO- get.nextflow.io | bash
sudo mv nextflow /usr/local/bin/ sudo mv nextflow /usr/local/bin/
# Test the module # Test the module
- run: nextflow run ./software/samtools/sort/test/ - run: nextflow run ./software/picard/markduplicates/test/ -profile docker

View file

@ -1,18 +1,18 @@
name: samtools index name: picard_mergesamfiles
on: on:
push: push:
paths: paths:
- software/samtools/index/** - software/picard/mergesamfiles/**
- .github/workflows/samtools_index.yml - .github/workflows/picard_mergesamfiles.yml
- tests - tests
pull_request: pull_request:
paths: paths:
- software/samtools/index/** - software/picard/mergesamfiles/**
- .github/workflows/samtools_index.yml - .github/workflows/picard_mergesamfiles.yml
- tests - tests
jobs: jobs:
run_ci_test: ci_test:
runs-on: ubuntu-latest runs-on: ubuntu-latest
env: env:
NXF_ANSI_LOG: false NXF_ANSI_LOG: false
@ -22,8 +22,9 @@ jobs:
- name: Install Nextflow - name: Install Nextflow
run: | run: |
export NXF_VER="20.07.1"
wget -qO- get.nextflow.io | bash wget -qO- get.nextflow.io | bash
sudo mv nextflow /usr/local/bin/ sudo mv nextflow /usr/local/bin/
# Test the module # Test the module
- run: nextflow run ./software/samtools/index/test/ - run: nextflow run ./software/picard/mergesamfiles/test/ -profile docker

View file

@ -0,0 +1,70 @@
name: picard_collectmultiplemetrics
description: Collect multiple metrics from a BAM file
keywords:
- alignment
- metrics
- statistics
- insert
- quality
- bam
tools:
- picard:
description: |
A set of command line tools (in Java) for manipulating high-throughput sequencing (HTS)
data and formats such as SAM/BAM/CRAM and VCF.
homepage: https://broadinstitute.github.io/picard/
documentation: https://broadinstitute.github.io/picard/
params:
- outdir:
type: string
description: |
The pipeline's output directory. By default, the module will
output files into `$params.outdir/<SOFTWARE>`
- publish_dir_mode:
type: string
description: |
Value for the Nextflow `publishDir` mode parameter.
Available: symlink, rellink, link, copy, copyNoFollow, move.
- conda:
type: boolean
description: |
Run the module with Conda using the software specified
via the `conda` directive
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- bam:
type: file
description: BAM file
pattern: "*.{bam}"
- fasta:
type: file
description: Genome fasta file
- options:
type: map
description: |
Groovy Map containing module options for passing command-line arguments and
output file paths.
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- metrics:
type: file
description: Alignment metrics files generated by picard
pattern: "*_{metrics}"
- pdf:
type: file
description: PDF plots of metrics
pattern: "*.{pdf}"
- version:
type: file
description: File containing software version
pattern: "*.{version.txt}"
authors:
- "@drpatelh"

View file

@ -0,0 +1 @@
../../../../../tests/data/fasta/E_coli/NC_010473.fa

View file

@ -0,0 +1 @@
../../../../../tests/data/bam/test.paired_end.sorted.bam

View file

@ -0,0 +1,22 @@
#!/usr/bin/env nextflow
nextflow.enable.dsl = 2
include { PICARD_COLLECTMULTIPLEMETRICS } from '../main.nf'
workflow test {
def input = []
input = [ [ id:'test', single_end:false ], // meta map
file("${baseDir}/input/test.paired_end.sorted.bam", checkIfExists: true) ]
PICARD_COLLECTMULTIPLEMETRICS (
input,
file("${baseDir}/input/NC_010473.fa", checkIfExists: true),
[:]
)
}
workflow {
test()
}

View file

@ -0,0 +1,20 @@
params {
outdir = "output/"
publish_dir_mode = "copy"
conda = false
}
profiles {
conda {
params.conda = true
}
docker {
docker.enabled = true
docker.runOptions = '-u \$(id -u):\$(id -g)'
}
singularity {
singularity.enabled = true
singularity.autoMounts = true
}
}

View file

@ -0,0 +1,12 @@
## htsjdk.samtools.metrics.StringHeader
# CollectMultipleMetrics INPUT=test.paired_end.sorted.bam OUTPUT=test.CollectMultipleMetrics REFERENCE_SEQUENCE=NC_010473.fa ASSUME_SORTED=true STOP_AFTER=0 METRIC_ACCUMULATION_LEVEL=[ALL_READS] PROGRAM=[CollectAlignmentSummaryMetrics, CollectBaseDistributionByCycle, CollectInsertSizeMetrics, MeanQualityByCycle, QualityScoreDistribution] INCLUDE_UNPAIRED=false VERBOSITY=INFO QUIET=false VALIDATION_STRINGENCY=STRICT COMPRESSION_LEVEL=5 MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false CREATE_MD5_FILE=false GA4GH_CLIENT_SECRETS=client_secrets.json USE_JDK_DEFLATER=false USE_JDK_INFLATER=false
## htsjdk.samtools.metrics.StringHeader
# Started on: Fri Aug 07 15:24:11 GMT 2020
## METRICS CLASS picard.analysis.AlignmentSummaryMetrics
CATEGORY TOTAL_READS PF_READS PCT_PF_READS PF_NOISE_READS PF_READS_ALIGNED PCT_PF_READS_ALIGNED PF_ALIGNED_BASES PF_HQ_ALIGNED_READS PF_HQ_ALIGNED_BASES PF_HQ_ALIGNED_Q20_BASES PF_HQ_MEDIAN_MISMATCHES PF_MISMATCH_RATE PF_HQ_ERROR_RATE PF_INDEL_RATE MEAN_READ_LENGTH READS_ALIGNED_IN_PAIRS PCT_READS_ALIGNED_IN_PAIRS PF_READS_IMPROPER_PAIRS PCT_PF_READS_IMPROPER_PAIRS BAD_CYCLES STRAND_BALANCE PCT_CHIMERAS PCT_ADAPTER SAMPLE LIBRARY READ_GROUP
FIRST_OF_PAIR 10000 10000 1 0 10000 1 1000000 9367 936700 936700 0 0 0 0 100 10000 1 0 0 0 0.5078 0 0
SECOND_OF_PAIR 10000 10000 1 0 10000 1 1000000 9366 936600 936600 0 0.000001 0.000001 0 100 10000 1 0 0 0 0.4922 0 0
PAIR 20000 20000 1 0 20000 1 2000000 18733 1873300 1873300 0 0.000001 0.000001 0 100 20000 1 0 0 0 0.5 0 0

View file

@ -0,0 +1,209 @@
## htsjdk.samtools.metrics.StringHeader
# CollectMultipleMetrics INPUT=test.paired_end.sorted.bam OUTPUT=test.CollectMultipleMetrics REFERENCE_SEQUENCE=NC_010473.fa ASSUME_SORTED=true STOP_AFTER=0 METRIC_ACCUMULATION_LEVEL=[ALL_READS] PROGRAM=[CollectAlignmentSummaryMetrics, CollectBaseDistributionByCycle, CollectInsertSizeMetrics, MeanQualityByCycle, QualityScoreDistribution] INCLUDE_UNPAIRED=false VERBOSITY=INFO QUIET=false VALIDATION_STRINGENCY=STRICT COMPRESSION_LEVEL=5 MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false CREATE_MD5_FILE=false GA4GH_CLIENT_SECRETS=client_secrets.json USE_JDK_DEFLATER=false USE_JDK_INFLATER=false
## htsjdk.samtools.metrics.StringHeader
# Started on: Fri Aug 07 15:24:11 GMT 2020
## METRICS CLASS picard.analysis.BaseDistributionByCycleMetrics
READ_END CYCLE PCT_A PCT_C PCT_G PCT_T PCT_N
1 1 24.74 25.02 25.61 24.63 0
1 2 25.5 23.74 25.96 24.8 0
1 3 24.91 25.4 24.96 24.73 0
1 4 25.44 25.36 24.59 24.61 0
1 5 25.33 25.28 25.04 24.35 0
1 6 24.94 25.83 25.25 23.98 0
1 7 24.26 25.58 26.45 23.71 0
1 8 23.87 26.45 25.17 24.51 0
1 9 24.03 26.08 25.7 24.19 0
1 10 24.8 24.8 25.34 25.06 0
1 11 25.03 25.07 24.86 25.04 0
1 12 24.37 25.97 25.25 24.41 0
1 13 25.16 24.85 25.59 24.4 0
1 14 24.27 25.69 25.79 24.25 0
1 15 24.69 25.93 25.62 23.76 0
1 16 24.86 25.94 24.97 24.23 0
1 17 24.38 25.68 25.18 24.76 0
1 18 24.03 25.63 25.9 24.44 0
1 19 24.48 24.89 25.72 24.91 0
1 20 24.06 25.07 25.75 25.12 0
1 21 24.76 25.39 24.4 25.45 0
1 22 25.18 25.02 24.66 25.14 0
1 23 24.54 25.44 25.83 24.19 0
1 24 23.7 26.35 25.59 24.36 0
1 25 23.81 24.86 25.83 25.5 0
1 26 24.57 25.78 25.11 24.54 0
1 27 24 25.53 25.8 24.67 0
1 28 24.32 25.76 25.36 24.56 0
1 29 24.57 25.03 26.21 24.19 0
1 30 25.44 25.05 24.7 24.81 0
1 31 24.16 25.28 25.86 24.7 0
1 32 24.45 25.24 25.21 25.1 0
1 33 24.47 26.17 24.98 24.38 0
1 34 24.95 24.47 26.38 24.2 0
1 35 24.46 26.59 24.82 24.13 0
1 36 24.35 25.45 25.7 24.5 0
1 37 24.54 25.05 25.9 24.51 0
1 38 24.82 26.59 24.4 24.19 0
1 39 24.37 25.48 25.59 24.56 0
1 40 24.93 25.4 25.17 24.5 0
1 41 24.6 25.51 24.78 25.11 0
1 42 24.33 26.16 25.04 24.47 0
1 43 25.01 25.57 25.51 23.91 0
1 44 24.57 25.52 25.48 24.43 0
1 45 24.62 25.32 25.59 24.47 0
1 46 24.53 25.4 25.15 24.92 0
1 47 24.59 25.66 24.94 24.81 0
1 48 23.81 25.76 25.33 25.1 0
1 49 25.35 25.7 23.8 25.15 0
1 50 24.53 25.21 25.47 24.79 0
1 51 24.03 25.69 25.92 24.36 0
1 52 23.98 25.01 25.99 25.02 0
1 53 24.46 25.5 25.59 24.45 0
1 54 24.41 25.73 25.45 24.41 0
1 55 25.18 25.19 25.45 24.18 0
1 56 24.18 25.49 25.72 24.61 0
1 57 24.08 25.49 25.41 25.02 0
1 58 24.1 25.35 25.38 25.17 0
1 59 24.58 25.58 25.39 24.45 0
1 60 24.67 25.22 25.47 24.64 0
1 61 24.09 25.56 25.36 24.99 0
1 62 25.2 25.47 25.68 23.65 0
1 63 24.47 25.4 25.28 24.85 0
1 64 24.8 25.45 25.4 24.35 0
1 65 25.08 26.2 24.91 23.81 0
1 66 24.89 24.74 25.37 25 0
1 67 25.1 25.13 25.01 24.76 0
1 68 24.73 25.3 25.13 24.84 0
1 69 24.94 25.18 25.2 24.68 0
1 70 24.29 25.21 25.62 24.88 0
1 71 24.21 26.3 25.72 23.77 0
1 72 24.39 25.61 25.34 24.66 0
1 73 25.29 24.55 25.67 24.49 0
1 74 25.66 25.07 24.54 24.73 0
1 75 24.26 26.19 24.81 24.74 0
1 76 24.04 24.89 26.43 24.64 0
1 77 25.01 25.04 25.04 24.91 0
1 78 24.37 26.53 25.05 24.05 0
1 79 24.6 25.35 25.69 24.36 0
1 80 24.9 25.32 25.33 24.45 0
1 81 25.1 26.14 24.64 24.12 0
1 82 23.95 25.9 25.23 24.92 0
1 83 24.73 25.25 25.41 24.61 0
1 84 24.81 25.43 25.68 24.08 0
1 85 24.73 24.72 25.57 24.98 0
1 86 24.12 25.26 25.17 25.45 0
1 87 24.68 25.94 25.17 24.21 0
1 88 24.77 25.11 25.88 24.24 0
1 89 24.09 25.62 25.34 24.95 0
1 90 24.35 25.57 24.76 25.32 0
1 91 25.17 25.07 25.31 24.45 0
1 92 25.65 25.04 25 24.31 0
1 93 25.04 25.51 25.04 24.41 0
1 94 24.85 26.17 25.11 23.87 0
1 95 24.81 25.37 25.5 24.32 0
1 96 24.84 25.25 25.47 24.44 0
1 97 24.61 25.47 25.27 24.65 0
1 98 24.86 25 25.48 24.66 0
1 99 23.74 25.3 25.96 25 0
1 100 23.83 26.06 25.13 24.98 0
2 101 24.05 25.38 26.27 24.3 0
2 102 23.62 25.98 25.49 24.91 0
2 103 23.93 25.62 26.32 24.13 0
2 104 24.94 25.34 25.24 24.48 0
2 105 24.69 25.34 25.35 24.62 0
2 106 24.69 24.97 25.08 25.26 0
2 107 24.1 25.17 26.68 24.05 0
2 108 24.92 25.49 24.96 24.63 0
2 109 24.23 25.94 24.81 25.02 0
2 110 24.38 25.12 25.59 24.91 0
2 111 24.42 25.6 25.75 24.23 0
2 112 25.33 25.73 24.96 23.98 0
2 113 25.35 24.51 26.17 23.97 0
2 114 24.12 25.34 26.17 24.37 0
2 115 24.77 25.23 25.13 24.87 0
2 116 25.36 25.27 25.1 24.27 0
2 117 24.88 24.93 25.68 24.51 0
2 118 23.79 25.75 25.19 25.27 0
2 119 24.43 25.82 24.93 24.82 0
2 120 24.4 25.93 24.62 25.05 0
2 121 25.88 24.75 25.12 24.25 0
2 122 24.27 25.8 24.92 25.01 0
2 123 24.61 25.95 25.28 24.16 0
2 124 24.66 25.46 25.69 24.19 0
2 125 24.09 25.61 25.73 24.57 0
2 126 25.11 24.62 25.34 24.93 0
2 127 24.51 26.23 24.74 24.52 0
2 128 25.39 25.58 24.73 24.3 0
2 129 24.85 25.62 24.9 24.63 0
2 130 24.43 25.89 25.15 24.53 0
2 131 24.7 25.53 25.58 24.19 0
2 132 24.5 26.2 25.01 24.29 0
2 133 23.88 25.74 25.43 24.95 0
2 134 24.9 25.2 25.18 24.72 0
2 135 24.16 25.63 25.75 24.46 0
2 136 23.8 25.87 25.42 24.91 0
2 137 24.96 25.03 26.27 23.74 0
2 138 24.53 25.69 24.99 24.79 0
2 139 23.96 25.89 24.7 25.45 0
2 140 24.13 24.91 25.68 25.28 0
2 141 24.32 25.44 25.03 25.21 0
2 142 24.23 25.86 24.69 25.22 0
2 143 24.44 25.74 24.6 25.22 0
2 144 25.39 24.13 25.44 25.04 0
2 145 24.68 25.9 25.28 24.14 0
2 146 23.85 25.25 25.61 25.29 0
2 147 25.2 23.57 26.09 25.14 0
2 148 25.49 25.2 25.07 24.24 0
2 149 25.12 25.47 25.15 24.26 0
2 150 24.73 25.26 25.44 24.57 0
2 151 25.43 25.54 24.96 24.07 0
2 152 25.03 25.6 24.97 24.4 0
2 153 25.63 25.45 24.67 24.25 0
2 154 24.32 25.73 25.33 24.62 0
2 155 23.73 25.22 25.57 25.48 0
2 156 23.82 25.86 26.08 24.24 0
2 157 24.82 24.65 25.22 25.31 0
2 158 24.96 25.77 24.82 24.45 0
2 159 24.16 26.01 25.42 24.41 0
2 160 24.57 25.49 25.27 24.67 0
2 161 24.56 25.46 25.66 24.32 0
2 162 24.85 24.93 25.01 25.21 0
2 163 24.54 24.82 25.87 24.77 0
2 164 24.88 25.37 25.67 24.08 0
2 165 24.49 25.02 26.39 24.1 0
2 166 24.03 25.21 25.87 24.89 0
2 167 24.37 24.94 25.79 24.9 0
2 168 24.04 25.06 25.86 25.04 0
2 169 24.44 25.11 25.49 24.95 0.01
2 170 24.47 25.75 25.4 24.38 0
2 171 24.31 25.94 25.55 24.2 0
2 172 25.19 25.19 25.29 24.33 0
2 173 24.78 25.82 25.31 24.09 0
2 174 24.65 25.8 25.01 24.54 0
2 175 24.7 25.09 25.57 24.64 0
2 176 24.75 25 24.93 25.32 0
2 177 24.61 25.39 25.07 24.93 0
2 178 24.64 25.38 25.92 24.06 0
2 179 25.21 25.97 24.86 23.96 0
2 180 23.95 26.44 25.54 24.07 0
2 181 25.06 25.25 26.07 23.62 0
2 182 23.91 26.04 25.25 24.8 0
2 183 25.01 25.52 25.44 24.03 0
2 184 25.19 25.68 24.67 24.46 0
2 185 24.81 25.27 24.82 25.1 0
2 186 24.31 25.78 24.9 25.01 0
2 187 24.94 24.98 25.55 24.53 0
2 188 25.25 25.77 25.32 23.66 0
2 189 25.07 25.91 24.94 24.08 0
2 190 24.64 25.42 26.18 23.76 0
2 191 24.92 25.38 25.24 24.46 0
2 192 24.47 25.18 25.08 25.27 0
2 193 25.02 25.58 24.58 24.82 0
2 194 24.05 25.41 24.94 25.6 0
2 195 24.91 25.05 25.08 24.96 0
2 196 24.1 25.22 25.74 24.94 0
2 197 24.56 24.56 25.68 25.2 0
2 198 25.34 25.16 25.36 24.14 0
2 199 24.8 25.47 25.7 24.03 0
2 200 23.53 26.05 26.1 24.32 0

View file

@ -0,0 +1,343 @@
## htsjdk.samtools.metrics.StringHeader
# CollectMultipleMetrics INPUT=test.paired_end.sorted.bam OUTPUT=test.CollectMultipleMetrics REFERENCE_SEQUENCE=NC_010473.fa ASSUME_SORTED=true STOP_AFTER=0 METRIC_ACCUMULATION_LEVEL=[ALL_READS] PROGRAM=[CollectAlignmentSummaryMetrics, CollectBaseDistributionByCycle, CollectInsertSizeMetrics, MeanQualityByCycle, QualityScoreDistribution] INCLUDE_UNPAIRED=false VERBOSITY=INFO QUIET=false VALIDATION_STRINGENCY=STRICT COMPRESSION_LEVEL=5 MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false CREATE_MD5_FILE=false GA4GH_CLIENT_SECRETS=client_secrets.json USE_JDK_DEFLATER=false USE_JDK_INFLATER=false
## htsjdk.samtools.metrics.StringHeader
# Started on: Fri Aug 07 15:24:11 GMT 2020
## METRICS CLASS picard.analysis.InsertSizeMetrics
MEDIAN_INSERT_SIZE MODE_INSERT_SIZE MEDIAN_ABSOLUTE_DEVIATION MIN_INSERT_SIZE MAX_INSERT_SIZE MEAN_INSERT_SIZE STANDARD_DEVIATION READ_PAIRS PAIR_ORIENTATION WIDTH_OF_10_PERCENT WIDTH_OF_20_PERCENT WIDTH_OF_30_PERCENT WIDTH_OF_40_PERCENT WIDTH_OF_50_PERCENT WIDTH_OF_60_PERCENT WIDTH_OF_70_PERCENT WIDTH_OF_80_PERCENT WIDTH_OF_90_PERCENT WIDTH_OF_95_PERCENT WIDTH_OF_99_PERCENT SAMPLE LIBRARY READ_GROUP
236 117 82 69 404 234.8136 94.885497 10000 FR 35 67 99 133 165 199 231 263 295 313 329
## HISTOGRAM java.lang.Integer
insert_size All_Reads.fr_count
69 36
70 11
71 30
72 24
73 34
74 25
75 30
76 22
77 26
78 24
79 30
80 25
81 33
82 33
83 27
84 26
85 36
86 26
87 26
88 34
89 24
90 33
91 35
92 40
93 37
94 20
95 30
96 26
97 33
98 29
99 30
100 33
101 27
102 29
103 37
104 32
105 26
106 38
107 26
108 32
109 26
110 29
111 43
112 26
113 20
114 34
115 32
116 32
117 52
118 30
119 29
120 28
121 35
122 25
123 34
124 22
125 20
126 38
127 30
128 32
129 31
130 33
131 32
132 22
133 35
134 30
135 28
136 33
137 28
138 29
139 31
140 34
141 37
142 30
143 30
144 27
145 24
146 26
147 33
148 33
149 28
150 25
151 25
152 24
153 24
154 33
155 36
156 21
157 26
158 26
159 27
160 27
161 34
162 28
163 27
164 28
165 25
166 34
167 24
168 41
169 26
170 34
171 28
172 30
173 28
174 26
175 29
176 37
177 35
178 28
179 25
180 25
181 36
182 31
183 25
184 29
185 32
186 24
187 27
188 31
189 34
190 45
191 32
192 20
193 41
194 30
195 31
196 24
197 36
198 27
199 30
200 30
201 26
202 40
203 37
204 37
205 28
206 33
207 28
208 31
209 37
210 28
211 31
212 32
213 33
214 33
215 29
216 28
217 33
218 27
219 22
220 31
221 38
222 29
223 41
224 22
225 28
226 29
227 26
228 26
229 33
230 21
231 24
232 31
233 33
234 23
235 29
236 30
237 31
238 33
239 31
240 25
241 29
242 40
243 24
244 29
245 39
246 32
247 40
248 30
249 29
250 32
251 22
252 33
253 33
254 27
255 28
256 19
257 39
258 27
259 34
260 30
261 36
262 34
263 32
264 34
265 19
266 45
267 29
268 38
269 28
270 27
271 26
272 35
273 31
274 31
275 25
276 23
277 35
278 31
279 35
280 27
281 35
282 28
283 30
284 19
285 30
286 31
287 28
288 28
289 42
290 34
291 32
292 36
293 33
294 34
295 29
296 26
297 19
298 37
299 24
300 29
301 25
302 26
303 43
304 31
305 30
306 28
307 29
308 35
309 35
310 29
311 35
312 30
313 48
314 17
315 26
316 25
317 28
318 31
319 33
320 36
321 30
322 34
323 30
324 38
325 34
326 23
327 28
328 37
329 26
330 30
331 37
332 33
333 28
334 28
335 37
336 42
337 30
338 33
339 32
340 37
341 26
342 37
343 29
344 23
345 35
346 25
347 33
348 34
349 37
350 30
351 23
352 30
353 30
354 35
355 30
356 28
357 26
358 31
359 28
360 27
361 32
362 30
363 26
364 29
365 40
366 33
367 38
368 27
369 34
370 43
371 27
372 35
373 27
374 24
375 30
376 36
377 24
378 30
379 35
380 26
381 32
382 34
383 34
384 34
385 21
386 34
387 35
388 29
389 25
390 24
391 25
392 29
393 32
394 33
395 27
396 22
397 36
398 28
404 1

View file

@ -0,0 +1,209 @@
## htsjdk.samtools.metrics.StringHeader
# CollectMultipleMetrics INPUT=test.paired_end.sorted.bam OUTPUT=test.CollectMultipleMetrics REFERENCE_SEQUENCE=NC_010473.fa ASSUME_SORTED=true STOP_AFTER=0 METRIC_ACCUMULATION_LEVEL=[ALL_READS] PROGRAM=[CollectAlignmentSummaryMetrics, CollectBaseDistributionByCycle, CollectInsertSizeMetrics, MeanQualityByCycle, QualityScoreDistribution] INCLUDE_UNPAIRED=false VERBOSITY=INFO QUIET=false VALIDATION_STRINGENCY=STRICT COMPRESSION_LEVEL=5 MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false CREATE_MD5_FILE=false GA4GH_CLIENT_SECRETS=client_secrets.json USE_JDK_DEFLATER=false USE_JDK_INFLATER=false
## htsjdk.samtools.metrics.StringHeader
# Started on: Fri Aug 07 15:24:11 GMT 2020
## HISTOGRAM java.lang.Integer
CYCLE MEAN_QUALITY
1 40
2 40
3 40
4 40
5 40
6 40
7 40
8 40
9 40
10 40
11 40
12 40
13 40
14 40
15 40
16 40
17 40
18 40
19 40
20 40
21 40
22 40
23 40
24 40
25 40
26 40
27 40
28 40
29 40
30 40
31 40
32 40
33 40
34 40
35 40
36 40
37 40
38 40
39 40
40 40
41 40
42 40
43 40
44 40
45 40
46 40
47 40
48 40
49 40
50 40
51 40
52 40
53 40
54 40
55 40
56 40
57 40
58 40
59 40
60 40
61 40
62 40
63 40
64 40
65 40
66 40
67 40
68 40
69 40
70 40
71 40
72 40
73 40
74 40
75 40
76 40
77 40
78 40
79 40
80 40
81 40
82 40
83 40
84 40
85 40
86 40
87 40
88 40
89 40
90 40
91 40
92 40
93 40
94 40
95 40
96 40
97 40
98 40
99 40
100 40
101 40
102 40
103 40
104 40
105 40
106 40
107 40
108 40
109 40
110 40
111 40
112 40
113 40
114 40
115 40
116 40
117 40
118 40
119 40
120 40
121 40
122 40
123 40
124 40
125 40
126 40
127 40
128 40
129 40
130 40
131 40
132 40
133 40
134 40
135 40
136 40
137 40
138 40
139 40
140 40
141 40
142 40
143 40
144 40
145 40
146 40
147 40
148 40
149 40
150 40
151 40
152 40
153 40
154 40
155 40
156 40
157 40
158 40
159 40
160 40
161 40
162 40
163 40
164 40
165 40
166 40
167 40
168 40
169 40
170 40
171 40
172 40
173 40
174 40
175 40
176 40
177 40
178 40
179 40
180 40
181 40
182 40
183 40
184 40
185 40
186 40
187 40
188 40
189 40
190 40
191 40
192 40
193 40
194 40
195 40
196 40
197 40
198 40
199 40
200 40

View file

@ -0,0 +1,10 @@
## htsjdk.samtools.metrics.StringHeader
# CollectMultipleMetrics INPUT=test.paired_end.sorted.bam OUTPUT=test.CollectMultipleMetrics REFERENCE_SEQUENCE=NC_010473.fa ASSUME_SORTED=true STOP_AFTER=0 METRIC_ACCUMULATION_LEVEL=[ALL_READS] PROGRAM=[CollectAlignmentSummaryMetrics, CollectBaseDistributionByCycle, CollectInsertSizeMetrics, MeanQualityByCycle, QualityScoreDistribution] INCLUDE_UNPAIRED=false VERBOSITY=INFO QUIET=false VALIDATION_STRINGENCY=STRICT COMPRESSION_LEVEL=5 MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false CREATE_MD5_FILE=false GA4GH_CLIENT_SECRETS=client_secrets.json USE_JDK_DEFLATER=false USE_JDK_INFLATER=false
## htsjdk.samtools.metrics.StringHeader
# Started on: Fri Aug 07 15:24:11 GMT 2020
## HISTOGRAM java.lang.Byte
QUALITY COUNT_OF_Q
40 1999999

View file

@ -0,0 +1,67 @@
name: picard_markduplicates
description: Locate and tag duplicate reads in a BAM file
keywords:
- markduplicates
- pcr
- duplicates
- bam
- sam
- cram
tools:
- picard:
description: |
A set of command line tools (in Java) for manipulating high-throughput sequencing (HTS)
data and formats such as SAM/BAM/CRAM and VCF.
homepage: https://broadinstitute.github.io/picard/
documentation: https://broadinstitute.github.io/picard/
params:
- outdir:
type: string
description: |
The pipeline's output directory. By default, the module will
output files into `$params.outdir/<SOFTWARE>`
- publish_dir_mode:
type: string
description: |
Value for the Nextflow `publishDir` mode parameter.
Available: symlink, rellink, link, copy, copyNoFollow, move.
- conda:
type: boolean
description: |
Run the module with Conda using the software specified
via the `conda` directive
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- bam:
type: file
description: BAM file
pattern: "*.{bam}"
- options:
type: map
description: |
Groovy Map containing module options for passing command-line arguments and
output file paths.
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- bam:
type: file
description: BAM file with duplicate reads marked/removed
pattern: "*.{bam}"
- metrics:
type: file
description: Duplicate metrics file generated by picard
pattern: "*.{metrics.txt}"
- version:
type: file
description: File containing software version
pattern: "*.{version.txt}"
authors:
- "@drpatelh"

View file

@ -0,0 +1 @@
../../../../../tests/data/bam/test.paired_end.sorted.bam

View file

@ -0,0 +1,18 @@
#!/usr/bin/env nextflow
nextflow.enable.dsl = 2
include { PICARD_MARKDUPLICATES } from '../main.nf'
workflow test {
def input = []
input = [ [ id:'test', single_end:false ], // meta map
file("${baseDir}/input/test.paired_end.sorted.bam", checkIfExists: true) ]
PICARD_MARKDUPLICATES ( input, [:] )
}
workflow {
test()
}

View file

@ -0,0 +1,20 @@
params {
outdir = "output/"
publish_dir_mode = "copy"
conda = false
}
profiles {
conda {
params.conda = true
}
docker {
docker.enabled = true
docker.runOptions = '-u \$(id -u):\$(id -g)'
}
singularity {
singularity.enabled = true
singularity.autoMounts = true
}
}

View file

@ -0,0 +1,13 @@
## htsjdk.samtools.metrics.StringHeader
# MarkDuplicates INPUT=[test.paired_end.sorted.bam] OUTPUT=test.bam METRICS_FILE=test.MarkDuplicates.metrics.txt MAX_SEQUENCES_FOR_DISK_READ_ENDS_MAP=50000 MAX_FILE_HANDLES_FOR_READ_ENDS_MAP=8000 SORTING_COLLECTION_SIZE_RATIO=0.25 TAG_DUPLICATE_SET_MEMBERS=false REMOVE_SEQUENCING_DUPLICATES=false TAGGING_POLICY=DontTag CLEAR_DT=true DUPLEX_UMI=false ADD_PG_TAG_TO_READS=true REMOVE_DUPLICATES=false ASSUME_SORTED=false DUPLICATE_SCORING_STRATEGY=SUM_OF_BASE_QUALITIES PROGRAM_RECORD_ID=MarkDuplicates PROGRAM_GROUP_NAME=MarkDuplicates READ_NAME_REGEX=<optimized capture of last three ':' separated fields as numeric values> OPTICAL_DUPLICATE_PIXEL_DISTANCE=100 MAX_OPTICAL_DUPLICATE_SET_SIZE=300000 VERBOSITY=INFO QUIET=false VALIDATION_STRINGENCY=STRICT COMPRESSION_LEVEL=5 MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false CREATE_MD5_FILE=false GA4GH_CLIENT_SECRETS=client_secrets.json USE_JDK_DEFLATER=false USE_JDK_INFLATER=false
## htsjdk.samtools.metrics.StringHeader
# Started on: Fri Aug 07 15:11:32 GMT 2020
## METRICS CLASS picard.sam.DuplicationMetrics
LIBRARY UNPAIRED_READS_EXAMINED READ_PAIRS_EXAMINED SECONDARY_OR_SUPPLEMENTARY_RDS UNMAPPED_READS UNPAIRED_READ_DUPLICATES READ_PAIR_DUPLICATES READ_PAIR_OPTICAL_DUPLICATES PERCENT_DUPLICATION ESTIMATED_LIBRARY_SIZE
Unknown Library 0 10000 0 0 0 0 0 0
## HISTOGRAM java.lang.Double
set_size all_sets non_optical_sets
1.0 10000 10000

View file

@ -0,0 +1,61 @@
name: picard_mergesamfiles
description: Merges multiple BAM files into a single file
keywords:
- merge
- alignment
- bam
- sam
tools:
- picard:
description: |
A set of command line tools (in Java) for manipulating high-throughput sequencing (HTS)
data and formats such as SAM/BAM/CRAM and VCF.
homepage: https://broadinstitute.github.io/picard/
documentation: https://broadinstitute.github.io/picard/
params:
- outdir:
type: string
description: |
The pipeline's output directory. By default, the module will
output files into `$params.outdir/<SOFTWARE>`
- publish_dir_mode:
type: string
description: |
Value for the Nextflow `publishDir` mode parameter.
Available: symlink, rellink, link, copy, copyNoFollow, move.
- conda:
type: boolean
description: |
Run the module with Conda using the software specified
via the `conda` directive
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- bam:
type: file
description: List of BAM files
pattern: "*.{bam}"
- options:
type: map
description: |
Groovy Map containing module options for passing command-line arguments and
output file paths.
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- bam:
type: file
description: Merged BAM file
pattern: "*.{bam}"
- version:
type: file
description: File containing software version
pattern: "*.{version.txt}"
authors:
- "@drpatelh"

View file

@ -0,0 +1 @@
../../../../../tests/data/bam/test.paired_end.sorted.bam

View file

@ -0,0 +1 @@
../../../../../tests/data/bam/test.paired_end.sorted.bam

View file

@ -0,0 +1,19 @@
#!/usr/bin/env nextflow
nextflow.enable.dsl = 2
include { PICARD_MERGESAMFILES } from '../main.nf'
workflow test {
def input = []
input = [ [ id:'test', single_end:false ], // meta map
[ file("${baseDir}/input/test.paired_end.sorted.bam", checkIfExists: true),
file("${baseDir}/input/test.paired_end.COPY.sorted.bam", checkIfExists: true), ] ]
PICARD_MERGESAMFILES ( input, [:] )
}
workflow {
test()
}

View file

@ -0,0 +1,20 @@
params {
outdir = "output/"
publish_dir_mode = "copy"
conda = false
}
profiles {
conda {
params.conda = true
}
docker {
docker.enabled = true
docker.runOptions = '-u \$(id -u):\$(id -g)'
}
singularity {
singularity.enabled = true
singularity.autoMounts = true
}
}