mirror of
https://github.com/MillironX/nf-core_modules.git
synced 2024-12-22 11:08:17 +00:00
Merge branch 'master' of https://github.com/nf-core/modules into split_tests
This commit is contained in:
commit
38c3ecfac6
174 changed files with 65 additions and 7589 deletions
7
.github/workflows/pytest-workflow.yml
vendored
7
.github/workflows/pytest-workflow.yml
vendored
|
@ -25,7 +25,7 @@ jobs:
|
|||
matrix:
|
||||
nxf_version: ['20.11.0-edge']
|
||||
tags: ['${{ fromJson(needs.changes.outputs.modules) }}']
|
||||
profile: ['docker', 'singularity', 'conda']
|
||||
profile: ['docker'] ## 'conda', 'singularity'
|
||||
env:
|
||||
NXF_ANSI_LOG: false
|
||||
steps:
|
||||
|
@ -72,8 +72,11 @@ jobs:
|
|||
uses: conda-incubator/setup-miniconda@v2
|
||||
with:
|
||||
auto-update-conda: true
|
||||
channels: defaults, bioconda, conda-forge
|
||||
channels: conda-forge,bioconda,defaults
|
||||
python-version: ${{ matrix.python-version }}
|
||||
- name: Conda clean
|
||||
if: matrix.profile == 'conda'
|
||||
run: conda clean -a
|
||||
|
||||
# Test the module
|
||||
- name: Run pytest-workflow
|
||||
|
|
|
@ -1,7 +0,0 @@
|
|||
FROM nfcore/base
|
||||
LABEL authors="Jose Espinosa-Carrasco" \
|
||||
description="Docker image containing all requirements for nf-core/modules/bedtools/complement"
|
||||
|
||||
COPY environment.yml /
|
||||
RUN conda env create -f /environment.yml && conda clean -a
|
||||
ENV PATH /opt/conda/envs/nf-core-bedtools-complement/bin:$PATH
|
|
@ -1,9 +0,0 @@
|
|||
# You can use this file to create a conda environment for this pipeline:
|
||||
# conda env create -f environment.yml
|
||||
name: nf-core-bedtools-complement
|
||||
channels:
|
||||
- conda-forge
|
||||
- bioconda
|
||||
- defaults
|
||||
dependencies:
|
||||
- bioconda::bedtools=2.29.2
|
|
@ -1,32 +0,0 @@
|
|||
def MODULE = "bedtools_complement"
|
||||
params.publish_dir = MODULE
|
||||
params.publish_results = "default"
|
||||
|
||||
process BEDTOOLS_COMPLEMENT {
|
||||
tag {input_file}
|
||||
|
||||
publishDir "${params.out_dir}/${params.publish_dir}",
|
||||
mode: params.publish_dir_mode,
|
||||
saveAs: { filename ->
|
||||
if (params.publish_results == "none") null
|
||||
else filename }
|
||||
|
||||
container "docker.pkg.github.com/nf-core/$MODULE"
|
||||
|
||||
conda "${moduleDir}/environment.yml"
|
||||
|
||||
input:
|
||||
path (input_file)
|
||||
path (fasta_sizes)
|
||||
val (bedtools_complement_args)
|
||||
|
||||
output:
|
||||
path "${input_file}.bed", emit: complement
|
||||
path "*.version.txt", emit: version
|
||||
|
||||
script:
|
||||
"""
|
||||
bedtools complement -i ${input_file} -g ${fasta_sizes} ${bedtools_complement_args} > ${input_file}.bed
|
||||
bedtools --version | sed -n "s/.*\\(v.*\$\\)/\\1/p" > bedtools.version.txt
|
||||
"""
|
||||
}
|
|
@ -1,30 +0,0 @@
|
|||
name: bedtools complement
|
||||
description: Returns all intervals in a genome that are not covered by at least one interval in the input BED/GFF/VCF file
|
||||
keywords:
|
||||
- complement
|
||||
tools:
|
||||
- bedtools:
|
||||
description: |
|
||||
Bedtools is a software package that provides with a toolset to perform genome arithmetic operations.
|
||||
homepage: https://bedtools.readthedocs.io/en/latest/index.html
|
||||
documentation: https://bedtools.readthedocs.io/en/latest/index.html
|
||||
doi: 10.093/bioinformatics/btq033
|
||||
input:
|
||||
-
|
||||
- input_file:
|
||||
type: file
|
||||
description: Input genomic coordinates file
|
||||
pattern: "*.{bed,gff,vcf}"
|
||||
- fasta_sizes:
|
||||
type: file
|
||||
description: Genome chromosome sizes
|
||||
pattern: "*.{txt,sizes}"
|
||||
|
||||
output:
|
||||
-
|
||||
- index:
|
||||
type: stdout,file
|
||||
description:
|
||||
pattern: "stdout,*.{bed,gff,vcf}"
|
||||
authors:
|
||||
- "@JoseEspinosa"
|
|
@ -1 +0,0 @@
|
|||
../../../../../tests/data/bed/A.bed
|
|
@ -1 +0,0 @@
|
|||
../../../../../tests/data/bed/genome.sizes
|
|
@ -1,19 +0,0 @@
|
|||
#!/usr/bin/env nextflow
|
||||
|
||||
nextflow.preview.dsl = 2
|
||||
|
||||
params.out_dir = "test_output"
|
||||
params.fastqc_args = ''
|
||||
params.publish_dir_mode = "copy"
|
||||
params.bedtools_complement_args = ''
|
||||
|
||||
include BEDTOOLS_COMPLEMENT from '../main.nf' params(params)
|
||||
|
||||
// Define input channels
|
||||
ch_input = Channel.fromPath('./input_data/A.bed')
|
||||
chrom_sizes = Channel.fromPath('./input_data/genome.sizes')
|
||||
|
||||
// Run the workflow
|
||||
workflow {
|
||||
BEDTOOLS_COMPLEMENT(ch_input, chrom_sizes, params.bedtools_complement_args)
|
||||
}
|
|
@ -1,7 +0,0 @@
|
|||
FROM nfcore/base
|
||||
LABEL authors="Jose Espinosa-Carrasco" \
|
||||
description="Docker image containing all requirements for nf-core/modules/bedtools/genomecov"
|
||||
|
||||
COPY environment.yml /
|
||||
RUN conda env create -f /environment.yml && conda clean -a
|
||||
ENV PATH /opt/conda/envs/nf-core-bedtools-genomecov/bin:$PATH
|
|
@ -1,9 +0,0 @@
|
|||
# You can use this file to create a conda environment for this pipeline:
|
||||
# conda env create -f environment.yml
|
||||
name: nf-core-bedtools-genomecov
|
||||
channels:
|
||||
- conda-forge
|
||||
- bioconda
|
||||
- defaults
|
||||
dependencies:
|
||||
- bioconda::bedtools=2.29.2
|
|
@ -1,32 +0,0 @@
|
|||
def MODULE = "bedtools_genomecov"
|
||||
params.publish_dir = MODULE
|
||||
params.publish_results = "default"
|
||||
|
||||
process BEDTOOLS_GENOMECOV {
|
||||
tag {bam}
|
||||
|
||||
publishDir "${params.out_dir}/${params.publish_dir}",
|
||||
mode: params.publish_dir_mode,
|
||||
saveAs: { filename ->
|
||||
if (params.publish_results == "none") null
|
||||
else filename }
|
||||
|
||||
container "docker.pkg.github.com/nf-core/$MODULE"
|
||||
|
||||
conda "${moduleDir}/environment.yml"
|
||||
|
||||
input:
|
||||
path (bam)
|
||||
path (chrom_sizes)
|
||||
val (bedtools_genomecov_args)
|
||||
|
||||
output:
|
||||
path "${bam}.bed", emit: coverage
|
||||
path "*.version.txt", emit: version
|
||||
|
||||
script:
|
||||
"""
|
||||
bedtools genomecov -ibam ${bam} -g ${chrom_sizes} ${bedtools_genomecov_args} > ${bam}.bed
|
||||
bedtools --version | sed -n "s/.*\\(v.*\$\\)/\\1/p" > bedtools.version.txt
|
||||
"""
|
||||
}
|
|
@ -1,30 +0,0 @@
|
|||
name: bedtools genomecov
|
||||
description: Returns feature coverage for a given genome in different formats
|
||||
keywords:
|
||||
- genomecov
|
||||
tools:
|
||||
- bedtools:
|
||||
description: |
|
||||
Bedtools is a software package that provides with a toolset to perform genome arithmetic operations.
|
||||
homepage: https://bedtools.readthedocs.io/en/latest/index.html
|
||||
documentation: https://bedtools.readthedocs.io/en/latest/index.html
|
||||
doi: 10.093/bioinformatics/btq033
|
||||
input:
|
||||
-
|
||||
- input_file:
|
||||
type: file
|
||||
description: Input genomic coordinates file
|
||||
pattern: "*.{bam}"
|
||||
- chrom_sizes:
|
||||
type: file
|
||||
description: Genome chromosome sizes
|
||||
pattern: "*.{txt,sizes}"
|
||||
|
||||
output:
|
||||
-
|
||||
- index:
|
||||
type: stdout,file
|
||||
description:
|
||||
pattern: "stdout,*.{bed,bedGraph}"
|
||||
authors:
|
||||
- "@JoseEspinosa"
|
|
@ -1 +0,0 @@
|
|||
../../../../../tests/data/bam/JK2067_downsampled_s0.1.bam
|
|
@ -1 +0,0 @@
|
|||
../../../../../tests/data/bed/genome.sizes
|
|
@ -1,19 +0,0 @@
|
|||
#!/usr/bin/env nextflow
|
||||
|
||||
nextflow.preview.dsl = 2
|
||||
|
||||
params.out_dir = "test_output"
|
||||
params.fastqc_args = ''
|
||||
params.publish_dir_mode = "copy"
|
||||
params.bedtools_genomecov_args = '' //'-bg'
|
||||
|
||||
include BEDTOOLS_GENOMECOV from '../main.nf' params(params)
|
||||
|
||||
// Define input channels
|
||||
ch_input = Channel.fromPath('./input_data/JK2067_downsampled_s0.1.bam')
|
||||
chrom_sizes = Channel.fromPath('./input_data/genome.sizes')
|
||||
|
||||
// Run the workflow
|
||||
workflow {
|
||||
BEDTOOLS_GENOMECOV(ch_input, chrom_sizes, params.bedtools_genomecov_args)
|
||||
}
|
|
@ -1,7 +0,0 @@
|
|||
FROM nfcore/base
|
||||
LABEL authors="Jose Espinosa-Carrasco" \
|
||||
description="Docker image containing all requirements for nf-core/modules/bedtools/intersect"
|
||||
|
||||
COPY environment.yml /
|
||||
RUN conda env create -f /environment.yml && conda clean -a
|
||||
ENV PATH /opt/conda/envs/nf-core-bedtools-intersectbed/bin:$PATH
|
|
@ -1,9 +0,0 @@
|
|||
# You can use this file to create a conda environment for this pipeline:
|
||||
# conda env create -f environment.yml
|
||||
name: nf-core-bedtools-intersect
|
||||
channels:
|
||||
- conda-forge
|
||||
- bioconda
|
||||
- defaults
|
||||
dependencies:
|
||||
- bioconda::bedtools=2.29.2
|
|
@ -1,36 +0,0 @@
|
|||
def MODULE = "bedtools_intersect"
|
||||
params.publish_dir = MODULE
|
||||
params.publish_results = "default"
|
||||
|
||||
process INTERSECT_BED {
|
||||
tag "$input_file_1-$input_file_2"
|
||||
|
||||
publishDir "${params.out_dir}/${params.publish_dir}",
|
||||
mode: params.publish_dir_mode,
|
||||
saveAs: { filename ->
|
||||
if (params.publish_results == "none") null
|
||||
else filename }
|
||||
|
||||
container "docker.pkg.github.com/nf-core/$MODULE"
|
||||
|
||||
conda "${moduleDir}/environment.yml"
|
||||
|
||||
input:
|
||||
path (input_file_1)
|
||||
path (input_file_2)
|
||||
val (intersectbed_args)
|
||||
|
||||
output:
|
||||
path "${input_file_1.baseName}_i_${input_file_2.baseName}.bed", emit: intersect
|
||||
path "*.version.txt", emit: version
|
||||
|
||||
script:
|
||||
def params_string = intersectbed_args.collect {
|
||||
/-$it.key $it.value/
|
||||
} join " "
|
||||
|
||||
"""
|
||||
bedtools intersect -a ${input_file_1} -b ${input_file_2} ${params_string} > ${input_file_1.baseName}_i_${input_file_2.baseName}.bed
|
||||
bedtools --version | sed -n "s/.*\\(v.*\$\\)/\\1/p" > bedtools.version.txt
|
||||
"""
|
||||
}
|
|
@ -1,30 +0,0 @@
|
|||
name: bedtools intersect
|
||||
description: Returns the overlapping features between two sets of genomics features
|
||||
keywords:
|
||||
- bedtools intersect
|
||||
tools:
|
||||
- bedtools:
|
||||
description: |
|
||||
Bedtools is a software package that provides with a toolset to perform genome arithmetic operations.
|
||||
homepage: https://bedtools.readthedocs.io/en/latest/index.html
|
||||
documentation: https://bedtools.readthedocs.io/en/latest/index.html
|
||||
doi: 10.093/bioinformatics/btq033
|
||||
input:
|
||||
-
|
||||
- input_file_1:
|
||||
type: file
|
||||
description: Input genomic coordinates file
|
||||
pattern: "*.{bam,bed,BED,gff,vcf}"
|
||||
- input_file_2:
|
||||
type: file
|
||||
description: Input genomic coordinates file
|
||||
pattern: "*.{bam,bed,BED,gff,vcf}"
|
||||
|
||||
output:
|
||||
-
|
||||
- index:
|
||||
type: stdout,file
|
||||
description:
|
||||
pattern: "stdout,*.{bed,BED}"
|
||||
authors:
|
||||
- "@JoseEspinosa"
|
|
@ -1 +0,0 @@
|
|||
../../../../../tests/data/bed/A.bed
|
|
@ -1 +0,0 @@
|
|||
../../../../../tests/data/bed/B.bed
|
|
@ -1,24 +0,0 @@
|
|||
#!/usr/bin/env nextflow
|
||||
|
||||
nextflow.preview.dsl = 2
|
||||
params.out_dir = "test_output"
|
||||
params.fastqc_args = ''
|
||||
params.publish_dir_mode = "copy"
|
||||
params.intersect_args = '' //'-bed -c -f 0.20'
|
||||
|
||||
include check_output from '../../../../tests/functions/check_process_outputs.nf' // params(params)
|
||||
include INTERSECT_BED from '../main.nf' params(params)
|
||||
|
||||
// Define input channels
|
||||
ch_input_1 = Channel.fromPath('./input_data/A.bed')
|
||||
ch_input_2 = Channel.fromPath('./input_data/B.bed')
|
||||
|
||||
def additional_params_map = [:]
|
||||
|
||||
additional_params_map = [ s: "",
|
||||
f: 0.9 ]
|
||||
|
||||
// Run the workflow
|
||||
workflow {
|
||||
INTERSECT_BED(ch_input_1, ch_input_2, additional_params_map)
|
||||
}
|
|
@ -1,7 +0,0 @@
|
|||
FROM nfcore/base
|
||||
LABEL authors="Jose Espinosa-Carrasco" \
|
||||
description="Docker image containing all requirements for nf-core/modules/bedtools/complementbed"
|
||||
|
||||
COPY environment.yml /
|
||||
RUN conda env create -f /environment.yml && conda clean -a
|
||||
ENV PATH /opt/conda/envs/nf-core-bedtools-merge/bin:$PATH
|
|
@ -1,9 +0,0 @@
|
|||
# You can use this file to create a conda environment for this pipeline:
|
||||
# conda env create -f environment.yml
|
||||
name: nf-core-bedtools-merge
|
||||
channels:
|
||||
- conda-forge
|
||||
- bioconda
|
||||
- defaults
|
||||
dependencies:
|
||||
- bioconda::bedtools=2.29.2
|
|
@ -1,31 +0,0 @@
|
|||
def MODULE = "bedtools_merge"
|
||||
params.publish_dir = MODULE
|
||||
params.publish_results = "default"
|
||||
|
||||
process BEDTOOLS_MERGE {
|
||||
tag { input_file }
|
||||
|
||||
publishDir "${params.out_dir}/${params.publish_dir}",
|
||||
mode: params.publish_dir_mode,
|
||||
saveAs: { filename ->
|
||||
if (params.publish_results == "none") null
|
||||
else filename }
|
||||
|
||||
container "docker.pkg.github.com/nf-core/$MODULE"
|
||||
|
||||
conda "${moduleDir}/environment.yml"
|
||||
|
||||
input:
|
||||
path (input_file)
|
||||
val (bedtools_merge_args)
|
||||
|
||||
output:
|
||||
path "${input_file}.bed", emit: merge
|
||||
path "*.version.txt", emit: version
|
||||
|
||||
script:
|
||||
"""
|
||||
bedtools merge -i ${input_file} ${bedtools_merge_args} > ${input_file}.bed
|
||||
bedtools --version | sed -n "s/.*\\(v.*\$\\)/\\1/p" > bedtools.version.txt
|
||||
"""
|
||||
}
|
|
@ -1,26 +0,0 @@
|
|||
name: bedtools merge
|
||||
description: Combines overlapping genome features of a single file
|
||||
keywords:
|
||||
- merge
|
||||
tools:
|
||||
- bedtools:
|
||||
description: |
|
||||
Bedtools is a software package that provides with a toolset to perform genome arithmetic operations.
|
||||
homepage: https://bedtools.readthedocs.io/en/latest/index.html
|
||||
documentation: https://bedtools.readthedocs.io/en/latest/index.html
|
||||
doi: 10.093/bioinformatics/btq033
|
||||
input:
|
||||
-
|
||||
- input_file:
|
||||
type: file
|
||||
description: Input genomic coordinates file
|
||||
pattern: "*.{bed,gff,vcf,bam}"
|
||||
|
||||
output:
|
||||
-
|
||||
- index:
|
||||
type: stdout,file
|
||||
description:
|
||||
pattern: "stdout,*.{bed}"
|
||||
authors:
|
||||
- "@JoseEspinosa"
|
|
@ -1 +0,0 @@
|
|||
../../../../../tests/data/bed/A.bed
|
|
@ -1 +0,0 @@
|
|||
../../../../../tests/data/bam/JK2067_downsampled_s0.1.bam
|
|
@ -1,19 +0,0 @@
|
|||
#!/usr/bin/env nextflow
|
||||
|
||||
nextflow.preview.dsl = 2
|
||||
|
||||
params.out_dir = "test_output"
|
||||
params.fastqc_args = ''
|
||||
params.publish_dir_mode = "copy"
|
||||
params.bedtools_merge_args = '' //''-s -c 6 -o distinct'
|
||||
|
||||
include BEDTOOLS_MERGE from '../main.nf' params(params)
|
||||
|
||||
// Define input channels
|
||||
ch_input = Channel.fromPath('./input_data/A.bed')
|
||||
//ch_input = Channel.fromPath('./input_data/JK2067_downsampled_s0.1.bam')
|
||||
|
||||
// Run the workflow
|
||||
workflow {
|
||||
BEDTOOLS_MERGE(ch_input, params.bedtools_merge_args)
|
||||
}
|
|
@ -1,2 +0,0 @@
|
|||
docker.enabled = true
|
||||
params.outdir = './results'
|
|
@ -1,9 +0,0 @@
|
|||
# You can use this file to create a conda environment for this pipeline:
|
||||
# conda env create -f environment.yml
|
||||
name: nf-core-bedtools-sort
|
||||
channels:
|
||||
- conda-forge
|
||||
- bioconda
|
||||
- defaults
|
||||
dependencies:
|
||||
- bioconda::bedtools=2.29.2
|
|
@ -1,31 +0,0 @@
|
|||
def MODULE = "bedtools_sort"
|
||||
params.publish_dir = MODULE
|
||||
params.publish_results = "default"
|
||||
|
||||
process BEDTOOLS_SORT {
|
||||
tag { input_file }
|
||||
|
||||
publishDir "${params.out_dir}/${params.publish_dir}",
|
||||
mode: params.publish_dir_mode,
|
||||
saveAs: { filename ->
|
||||
if (params.publish_results == "none") null
|
||||
else filename }
|
||||
|
||||
container "docker.pkg.github.com/nf-core/$MODULE"
|
||||
|
||||
conda "${moduleDir}/environment.yml"
|
||||
|
||||
input:
|
||||
path (input_file)
|
||||
val (bedtools_sort_args)
|
||||
|
||||
output:
|
||||
path "${input_file}.bed", emit: sort
|
||||
path "*.version.txt", emit: version
|
||||
|
||||
script:
|
||||
"""
|
||||
bedtools sort -i ${input_file} ${bedtools_sort_args} > ${input_file}.bed
|
||||
bedtools --version | sed -n "s/.*\\(v.*\$\\)/\\1/p" > bedtools.version.txt
|
||||
"""
|
||||
}
|
|
@ -1,26 +0,0 @@
|
|||
name: bedtools sort
|
||||
description: Returns a sorted feature file by chromosome and other criteria
|
||||
keywords:
|
||||
- sort
|
||||
tools:
|
||||
- bedtools:
|
||||
description: |
|
||||
Bedtools is a software package that provides with a toolset to perform genome arithmetic operations.
|
||||
homepage: https://bedtools.readthedocs.io/en/latest/index.html
|
||||
documentation: https://bedtools.readthedocs.io/en/latest/index.html
|
||||
doi: 10.093/bioinformatics/btq033
|
||||
input:
|
||||
-
|
||||
- input_file:
|
||||
type: file
|
||||
description: Input genomic coordinates file
|
||||
pattern: "*.{bed,gff,vcf}"
|
||||
|
||||
output:
|
||||
-
|
||||
- index:
|
||||
type: stdout,file
|
||||
description:
|
||||
pattern: "stdout,*.{bed,gff,vcf}"
|
||||
authors:
|
||||
- "@JoseEspinosa"
|
|
@ -1 +0,0 @@
|
|||
../../../../../tests/data/bed/A.bed
|
|
@ -1,18 +0,0 @@
|
|||
#!/usr/bin/env nextflow
|
||||
|
||||
nextflow.preview.dsl = 2
|
||||
|
||||
params.out_dir = "test_output"
|
||||
params.fastqc_args = ''
|
||||
params.publish_dir_mode = "copy"
|
||||
params.bedtools_sort_args = '' //'-sizeD'
|
||||
|
||||
include BEDTOOLS_SORT from '../main.nf' params(params)
|
||||
|
||||
// Define input channels
|
||||
ch_input = Channel.fromPath('./input_data/A.bed')
|
||||
|
||||
// Run the workflow
|
||||
workflow {
|
||||
BEDTOOLS_SORT(ch_input, params.bedtools_sort_args)
|
||||
}
|
|
@ -1,52 +0,0 @@
|
|||
nextflow.preview.dsl=2
|
||||
params.genome = ''
|
||||
|
||||
process BOWTIE2 {
|
||||
// depending on the genome used one might want/need to adjust the memory settings.
|
||||
// For the E. coli test data this is probably not required
|
||||
|
||||
// label 'bigMem'
|
||||
// label 'multiCore'
|
||||
|
||||
publishDir "$outdir/bowtie2",
|
||||
mode: "copy", overwrite: true
|
||||
|
||||
input:
|
||||
tuple val(name), path(reads)
|
||||
val (outdir)
|
||||
val (bowtie2_args)
|
||||
val (verbose)
|
||||
|
||||
output:
|
||||
path "*bam", emit: bam
|
||||
path "*stats.txt", emit: stats
|
||||
|
||||
script:
|
||||
if (verbose){
|
||||
println ("[MODULE] BOWTIE2 ARGS: " + bowtie2_args)
|
||||
}
|
||||
|
||||
cores = 4
|
||||
|
||||
readString = ""
|
||||
|
||||
// Options we add are
|
||||
bowtie2_options = bowtie2_args
|
||||
bowtie2_options += " --no-unal " // We don't need unaligned reads in the BAM file
|
||||
|
||||
// single-end / paired-end distinction. Might also be handled via params.single_end
|
||||
if (reads instanceof List) {
|
||||
readString = "-1 " + reads[0] + " -2 " + reads[1]
|
||||
}
|
||||
else {
|
||||
readString = "-U " + reads
|
||||
}
|
||||
|
||||
index = params.genome["bowtie2"]
|
||||
bowtie2_name = name + "_" + params.genome["name"]
|
||||
|
||||
"""
|
||||
bowtie2 -x ${index} -p ${cores} ${bowtie2_options} ${readString} 2>${bowtie2_name}_bowtie2_stats.txt | samtools view -bS -F 4 -F 8 -F 256 -> ${bowtie2_name}_bowtie2.bam
|
||||
"""
|
||||
|
||||
}
|
|
@ -1,37 +0,0 @@
|
|||
name: Bowtie 2
|
||||
description: Ultrafast alignment to reference genome
|
||||
keywords:
|
||||
- Alignment
|
||||
- Short reads
|
||||
- FM Index
|
||||
tools:
|
||||
- fastqc:
|
||||
description: |
|
||||
Bowtie 2 is an ultrafast and memory-efficient tool for aligning sequencing reads
|
||||
to long reference sequences. It is particularly good at aligning reads of about
|
||||
50 up to 100s or 1,000s of characters, and particularly good at aligning to relatively
|
||||
long (e.g. mammalian) genomes. Bowtie 2 indexes the genome with an FM Index to keep
|
||||
its memory footprint small: for the human genome, its memory footprint is typically
|
||||
around 3.2 GB. Bowtie 2 supports gapped, local, and paired-end alignment modes.
|
||||
homepage: http://bowtie-bio.sourceforge.net/bowtie2/index.shtml
|
||||
documentation: http://bowtie-bio.sourceforge.net/bowtie2/manual.shtml
|
||||
input:
|
||||
-
|
||||
- sample_id:
|
||||
type: string
|
||||
description: Sample identifier
|
||||
- reads:
|
||||
type: file
|
||||
description: Input FastQ file, or pair of files
|
||||
output:
|
||||
-
|
||||
- report:
|
||||
type: file
|
||||
description: mapping statistics report
|
||||
pattern: "*bowtie2_stats.txt"
|
||||
- alignment:
|
||||
type: file
|
||||
description: alignment file in BAM format
|
||||
pattern: "*bowtie2.bam"
|
||||
authors:
|
||||
- "@FelixKrueger"
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -1 +0,0 @@
|
|||
../../../../../tests/data/fasta/E_coli/NC_010473.fa
|
|
@ -1 +0,0 @@
|
|||
../../../../tests/data/fastq/dna/Ecoli_DNA_R1.fastq.gz
|
|
@ -1 +0,0 @@
|
|||
../../../../tests/data/fastq/dna/Ecoli_DNA_R2.fastq.gz
|
|
@ -1 +0,0 @@
|
|||
../../../../tests/data/fastq/rna/test_R1_val_1.fq.gz
|
|
@ -1 +0,0 @@
|
|||
../../../../tests/data/fastq/rna/test_R2_val_2.fq.gz
|
|
@ -1,31 +0,0 @@
|
|||
#!/usr/bin/env nextflow
|
||||
nextflow.preview.dsl=2
|
||||
|
||||
params.outdir = "."
|
||||
params.genome = ""
|
||||
params.bowtie2_args = ''
|
||||
// Bowtie2 arguments should be supplied in the following format to work:
|
||||
// --bowtie2_args="--score-min L,0,-0.8"
|
||||
|
||||
params.verbose = false
|
||||
|
||||
if (params.verbose){
|
||||
println ("[WORKFLOW] BOWTIE2 ARGS: " + params.bowtie2_args)
|
||||
}
|
||||
|
||||
// for other genomes this needs to be handled somehow to return all possible genomes
|
||||
genomeValues = ["name" : params.genome]
|
||||
genomeValues["bowtie2"] = "/bi/home/fkrueger/VersionControl/nf-core-modules/test-datasets/indices/bowtie2/E_coli/${params.genome}";
|
||||
|
||||
include '../main.nf' params(genome: genomeValues)
|
||||
|
||||
ch_read_files = Channel
|
||||
.fromFilePairs('../../../test-datasets/Ecoli*{1,2}.fastq.gz',size:-1)
|
||||
// .view() // to check whether the input channel works
|
||||
|
||||
workflow {
|
||||
|
||||
main:
|
||||
BOWTIE2(ch_read_files, params.outdir, params.bowtie2_args, params.verbose)
|
||||
|
||||
}
|
|
@ -1,2 +0,0 @@
|
|||
docker.enabled = true
|
||||
params.outdir = './results'
|
Binary file not shown.
|
@ -1,15 +0,0 @@
|
|||
10000 reads; of these:
|
||||
10000 (100.00%) were paired; of these:
|
||||
893 (8.93%) aligned concordantly 0 times
|
||||
8474 (84.74%) aligned concordantly exactly 1 time
|
||||
633 (6.33%) aligned concordantly >1 times
|
||||
----
|
||||
893 pairs aligned concordantly 0 times; of these:
|
||||
815 (91.27%) aligned discordantly 1 time
|
||||
----
|
||||
78 pairs aligned 0 times concordantly or discordantly; of these:
|
||||
156 mates make up the pairs; of these:
|
||||
0 (0.00%) aligned 0 times
|
||||
1 (0.64%) aligned exactly 1 time
|
||||
155 (99.36%) aligned >1 times
|
||||
100.00% overall alignment rate
|
Binary file not shown.
|
@ -1,15 +0,0 @@
|
|||
9979 reads; of these:
|
||||
9979 (100.00%) were paired; of these:
|
||||
3584 (35.92%) aligned concordantly 0 times
|
||||
3705 (37.13%) aligned concordantly exactly 1 time
|
||||
2690 (26.96%) aligned concordantly >1 times
|
||||
----
|
||||
3584 pairs aligned concordantly 0 times; of these:
|
||||
886 (24.72%) aligned discordantly 1 time
|
||||
----
|
||||
2698 pairs aligned 0 times concordantly or discordantly; of these:
|
||||
5396 mates make up the pairs; of these:
|
||||
2282 (42.29%) aligned 0 times
|
||||
1467 (27.19%) aligned exactly 1 time
|
||||
1647 (30.52%) aligned >1 times
|
||||
88.57% overall alignment rate
|
|
@ -1,41 +0,0 @@
|
|||
process cutadapt {
|
||||
tag "${sample_id}"
|
||||
|
||||
container 'quay.io/biocontainers/cutadapt:1.16--py27_1'
|
||||
|
||||
input:
|
||||
tuple val(sample_id), path(reads)
|
||||
|
||||
output:
|
||||
tuple sample_id, path("trimmed_*.fastq")
|
||||
|
||||
script:
|
||||
forward_fq = "trimmed_1.fastq"
|
||||
reverse_fq = "trimmed_2.fastq"
|
||||
|
||||
|
||||
if (params.single_end) {
|
||||
processing = """
|
||||
cutadapt \
|
||||
-j ${task.cpus} \
|
||||
-q $params.cutadapt_min_quality \
|
||||
--minimum-length $params.cutadapt_min_length \
|
||||
--output ${forward_fq} \
|
||||
${reads}
|
||||
"""
|
||||
} else {
|
||||
processing = """
|
||||
cutadapt \
|
||||
-j ${task.cpus} \
|
||||
-q $params.cutadapt_min_quality \
|
||||
--minimum-length $params.cutadapt_min_length \
|
||||
--pair-filter=any \
|
||||
--output ${forward_fq} \
|
||||
--paired-output ${reverse_fq} ${reads}
|
||||
"""
|
||||
}
|
||||
|
||||
version = "cutadapt --version &> v_cutadapt.txt"
|
||||
|
||||
return processing + version
|
||||
}
|
|
@ -1,36 +0,0 @@
|
|||
name: Cutadapt
|
||||
description: cutadapt removes adapter sequences from high-throughput sequencing reads
|
||||
keywords:
|
||||
- Quality Control
|
||||
- QC
|
||||
- Adapters
|
||||
tools:
|
||||
- fastqc:
|
||||
description: |
|
||||
Cutadapt finds and removes adapter sequences, primers, poly-A tails and other types of unwanted sequence
|
||||
from your high-throughput sequencing reads.
|
||||
|
||||
Cleaning your data in this way is often required: Reads from small-RNA sequencing contain the 3’
|
||||
sequencing adapter because the read is longer than the molecule that is sequenced. Amplicon reads
|
||||
start with a primer sequence. Poly-A tails are useful for pulling out RNA from your sample, but
|
||||
often you don’t want them to be in your reads.
|
||||
homepage: https://cutadapt.readthedocs.io/en/stable/
|
||||
documentation: https://cutadapt.readthedocs.io/en/stable/
|
||||
input:
|
||||
-
|
||||
- sample_id:
|
||||
type: string
|
||||
description: Sample identifier
|
||||
- reads:
|
||||
type: file
|
||||
description: Input FastQ file, or pair of files
|
||||
output:
|
||||
-
|
||||
- sample_id:
|
||||
type: string
|
||||
description: Sample identifier
|
||||
- reads:
|
||||
type: file
|
||||
description: trimmed FastQ file, or pair of files
|
||||
authors:
|
||||
- "@piotr-faba-ardigen"
|
|
@ -1,20 +0,0 @@
|
|||
#!/usr/bin/env nextflow
|
||||
nextflow.preview.dsl = 2
|
||||
include '../main.nf' params(params)
|
||||
|
||||
// Define input channels
|
||||
input_fastqs = Channel.fromFilePairs('../../../test-datasets/tools/cutadapt/input/*_{1,2}.fastq' )
|
||||
|
||||
if(params.single_end){
|
||||
input_fastqs = Channel.from([
|
||||
['SRR4238351', '../../../test-datasets/tools/cutadapt/input/SRR4238351_subsamp.fastq.gz'],
|
||||
['SRR4238355', '../../../test-datasets/tools/cutadapt/input/SRR4238355_subsamp.fastq.gz'],
|
||||
['SRR4238359', '../../../test-datasets/tools/cutadapt/input/SRR4238359_subsamp.fastq.gz'],
|
||||
['SRR4238379', '../../../test-datasets/tools/cutadapt/input/SRR4238379_subsamp.fastq.gz']
|
||||
]).map { row -> [ row[0], [ file(row[1]) ] ] }
|
||||
}
|
||||
|
||||
// Run the workflow
|
||||
workflow {
|
||||
cutadapt(input_fastqs)
|
||||
}
|
|
@ -1,9 +0,0 @@
|
|||
docker.enabled = true
|
||||
params.outdir = './results'
|
||||
|
||||
params{
|
||||
// Preprocessing options
|
||||
cutadapt_min_length = 40
|
||||
cutadapt_min_quality = 25
|
||||
single_end = false
|
||||
}
|
|
@ -1,58 +0,0 @@
|
|||
nextflow.preview.dsl=2
|
||||
params.genome = ''
|
||||
|
||||
process HISAT2 {
|
||||
// depending on the genome used one might want/need to adjust the memory settings.
|
||||
// For the E. coli test data this is probably not required
|
||||
// label 'bigMem'
|
||||
// label 'multiCore'
|
||||
|
||||
publishDir "$outdir/hisat2",
|
||||
mode: "copy", overwrite: true
|
||||
|
||||
input:
|
||||
tuple val(name), path(reads)
|
||||
val outdir
|
||||
val hisat2_args
|
||||
val verbose
|
||||
|
||||
output:
|
||||
path "*bam", emit: bam
|
||||
path "*stats.txt", emit: stats
|
||||
|
||||
script:
|
||||
|
||||
if (verbose){
|
||||
println ("[MODULE] HISAT2 ARGS: " + hisat2_args)
|
||||
}
|
||||
|
||||
cores = 4
|
||||
readString = ""
|
||||
hisat_options = hisat2_args
|
||||
|
||||
// Options we add are
|
||||
hisat_options = hisat_options + " --no-unal --no-softclip "
|
||||
|
||||
if (reads instanceof List) {
|
||||
readString = "-1 "+reads[0]+" -2 "+reads[1]
|
||||
hisat_options = hisat_options + " --no-mixed --no-discordant"
|
||||
}
|
||||
else {
|
||||
readString = "-U "+reads
|
||||
}
|
||||
index = params.genome["hisat2"]
|
||||
|
||||
splices = ''
|
||||
if (params.genome.containsKey("hisat2_splices")){
|
||||
splices = " --known-splicesite-infile " + params.genome["hisat2_splices"]
|
||||
}
|
||||
else{
|
||||
println ("No key 'hisat2_splices' was supplied. Skipping...")
|
||||
}
|
||||
hisat_name = name + "_" + params.genome["name"]
|
||||
|
||||
"""
|
||||
hisat2 -p ${cores} ${hisat_options} -x ${index} ${splices} ${readString} 2>${hisat_name}_hisat2_stats.txt | samtools view -bS -F 4 -F 8 -F 256 -> ${hisat_name}_hisat2.bam
|
||||
"""
|
||||
|
||||
}
|
|
@ -1,37 +0,0 @@
|
|||
name: HISAT2
|
||||
description: Graph-based alignment of next generation sequencing reads to a population of genomes
|
||||
keywords:
|
||||
- Alignment
|
||||
- Short reads
|
||||
- graph FM Index (GFM)
|
||||
- RNA-seq
|
||||
tools:
|
||||
- fastqc:
|
||||
description: |
|
||||
HISAT2 is a fast and sensitive alignment program for mapping next-generation
|
||||
sequencing reads (whole-genome, transcriptome, and exome sequencing data)
|
||||
against the general human population (as well as against a single reference genome).
|
||||
Based on GCSA (an extension of BWT for a graph) it is designed and implemented as a
|
||||
graph FM index (GFM).
|
||||
homepage: http://daehwankimlab.github.io/hisat2/
|
||||
documentation: https://ccb.jhu.edu/software/hisat2/manual.shtml
|
||||
input:
|
||||
-
|
||||
- sample_id:
|
||||
type: string
|
||||
description: Sample identifier
|
||||
- reads:
|
||||
type: file
|
||||
description: Input FastQ file, or pair of files
|
||||
output:
|
||||
-
|
||||
- report:
|
||||
type: file
|
||||
description: mapping statistics report
|
||||
pattern: "*hisat2_stats.txt"
|
||||
- alignment:
|
||||
type: file
|
||||
description: alignment file in BAM format
|
||||
pattern: "*hisat2.bam"
|
||||
authors:
|
||||
- "@FelixKrueger"
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -1 +0,0 @@
|
|||
../../../../../tests/data/fasta/E_coli/NC_010473.fa
|
|
@ -1 +0,0 @@
|
|||
../../../../tests/data/fastq/dna/Ecoli_DNA_R1.fastq.gz
|
|
@ -1 +0,0 @@
|
|||
../../../../tests/data/fastq/dna/Ecoli_DNA_R2.fastq.gz
|
|
@ -1,34 +0,0 @@
|
|||
#!/usr/bin/env nextflow
|
||||
nextflow.preview.dsl=2
|
||||
|
||||
params.outdir = "."
|
||||
params.genome = ""
|
||||
params.hisat2_args = ''
|
||||
// HISAT2 arguments should be supplied in the following format to work:
|
||||
// --hisat2_args="--score-min L,0,-0.8"
|
||||
|
||||
params.verbose = false
|
||||
|
||||
if (params.verbose){
|
||||
println ("[WORKFLOW] HISAT2 ARGS ARE: " + params.hisat2_args)
|
||||
}
|
||||
// for other genomes this needs to be handled somehow to return all possible genomes
|
||||
genomeValues = ["name" : params.genome]
|
||||
genomeValues["hisat2"] = "/bi/home/fkrueger/VersionControl/nf-core-modules/test-datasets/indices/hisat2/E_coli/${params.genome}";
|
||||
|
||||
include '../main.nf' params(genome: genomeValues)
|
||||
|
||||
ch_read_files = Channel
|
||||
.fromFilePairs('../../../test-datasets/Ecoli*{1,2}.fastq.gz',size:-1)
|
||||
// .view() // to check whether the input channel works
|
||||
|
||||
workflow {
|
||||
|
||||
main:
|
||||
HISAT2(ch_read_files, params.outdir, params.hisat2_args, params.verbose)
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
@ -1,2 +0,0 @@
|
|||
// docker.enabled = true
|
||||
params.outdir = './results'
|
Binary file not shown.
|
@ -1,6 +0,0 @@
|
|||
10000 reads; of these:
|
||||
10000 (100.00%) were paired; of these:
|
||||
823 (8.23%) aligned concordantly 0 times
|
||||
8583 (85.83%) aligned concordantly exactly 1 time
|
||||
594 (5.94%) aligned concordantly >1 times
|
||||
91.77% overall alignment rate
|
|
@ -1,31 +0,0 @@
|
|||
nextflow.preview.dsl=2
|
||||
|
||||
process MULTIQC {
|
||||
|
||||
// tag "FastQC - $sample_id"
|
||||
|
||||
publishDir "${outdir}/multiqc",
|
||||
mode: "copy", overwrite: true
|
||||
|
||||
input:
|
||||
path file
|
||||
val outdir
|
||||
val multiqc_args
|
||||
// multiqc_args are best passed into the workflow in the following manner:
|
||||
// --multiqc_args="--exlude STAR --title custom_report_title"
|
||||
val verbose
|
||||
|
||||
output:
|
||||
path "*html", emit: html
|
||||
|
||||
script:
|
||||
|
||||
if (verbose){
|
||||
println ("[MODULE] MULTIQC ARGS: " + multiqc_args)
|
||||
}
|
||||
|
||||
"""
|
||||
multiqc $multiqc_args -x work .
|
||||
"""
|
||||
|
||||
}
|
|
@ -1,26 +0,0 @@
|
|||
name: MultiQC
|
||||
description: Aggregate results from bioinformatics analyses across many samples into a single report
|
||||
keywords:
|
||||
- QC
|
||||
- bioinformatics tools
|
||||
- Beautiful stand-alone HTML report
|
||||
tools:
|
||||
- fastqc:
|
||||
description: |
|
||||
MultiQC searches a given directory for analysis logs and compiles a HTML report.
|
||||
It's a general use tool, perfect for summarising the output from numerous bioinformatics tools.
|
||||
homepage: https://multiqc.info/
|
||||
documentation: https://multiqc.info/docs/
|
||||
input:
|
||||
-
|
||||
- reads:
|
||||
type: file
|
||||
description: List of report file(s)
|
||||
output:
|
||||
-
|
||||
- multiqc_report:
|
||||
type: file
|
||||
description: MultiQC report
|
||||
pattern: "*multiqc*.html"
|
||||
authors:
|
||||
- "@FelixKrueger"
|
|
@ -1 +0,0 @@
|
|||
../../../../bowtie2/test/output/test_GRCm38_bowtie2_stats.txt
|
|
@ -1 +0,0 @@
|
|||
../../../../fastq_screen/test/output/test_R1_screen.txt
|
|
@ -1 +0,0 @@
|
|||
../../../../fastqc/test/output/test_R1_fastqc.zip
|
|
@ -1 +0,0 @@
|
|||
../../../../fastqc/test/output/test_R1_val_1_fastqc.zip
|
|
@ -1 +0,0 @@
|
|||
../../../../fastqc/test/output/test_R2_fastqc.zip
|
|
@ -1 +0,0 @@
|
|||
../../../../fastqc/test/output/test_R2_val_2_fastqc.zip
|
|
@ -1 +0,0 @@
|
|||
../../../../hisat2/test/output/Ecoli_DNA_R_E_coli_hisat2_stats.txt
|
|
@ -1 +0,0 @@
|
|||
../../../../trim_galore/test/output/test_R1.fastq.gz_trimming_report.txt
|
|
@ -1 +0,0 @@
|
|||
../../../../trim_galore/test/output/test_R2.fastq.gz_trimming_report.txt
|
|
@ -1,44 +0,0 @@
|
|||
#!/usr/bin/env nextflow
|
||||
nextflow.preview.dsl=2
|
||||
|
||||
params.outdir = "."
|
||||
params.verbose = false
|
||||
params.multiqc_args = ''
|
||||
|
||||
// include '../../../tests/functions/check_process_outputs.nf'
|
||||
include '../main.nf'
|
||||
|
||||
if (params.verbose){
|
||||
println ("[WORKFLOW] MULTIQC ARGS: " + params.multiqc_args)
|
||||
}
|
||||
|
||||
multiqc_ch = Channel
|
||||
.fromPath( ['../../../test-datasets/*trimming_report.txt','../../../test-datasets/*fastqc.zip','../../../test-datasets/*screen.txt','../../../test-datasets/*bowtie2_stats.txt'] )
|
||||
.collect() // collect() flattens all channels to single list
|
||||
// .view() // view the files in the channel
|
||||
|
||||
|
||||
// Run the workflow
|
||||
workflow {
|
||||
|
||||
main:
|
||||
// This is an example workflow for real reads aligned with Bowtie2. Just for illustration purposes
|
||||
|
||||
// FASTQC (file_ch, params.outdir, params.fastqc_args, params.verbose)
|
||||
// FASTQ_SCREEN (file_ch, params.outdir, params.fastq_screen_args, params.verbose)
|
||||
// TRIM_GALORE (file_ch, params.outdir, params.trim_galore_args, params.verbose)
|
||||
// FASTQC2 (TRIM_GALORE.out.reads, params.outdir, params.fastqc_args, params.verbose)
|
||||
// BOWTIE2 (TRIM_GALORE.out.reads, params.outdir, params.bowtie2_args, params.verbose)
|
||||
|
||||
// merging channels for MultiQC
|
||||
// multiqc_ch = FASTQC.out.report.mix(
|
||||
// TRIM_GALORE.out.report,
|
||||
// FASTQ_SCREEN.out.report,
|
||||
// FASTQC2.out.report,
|
||||
// BOWTIE2.out.stats,
|
||||
// ).collect()
|
||||
|
||||
MULTIQC (multiqc_ch, params.outdir, params.multiqc_args, params.verbose)
|
||||
|
||||
// .check_output() TODO
|
||||
}
|
|
@ -1,2 +0,0 @@
|
|||
// docker.enabled = true
|
||||
params.outdir = './results'
|
File diff suppressed because one or more lines are too long
|
@ -1,16 +0,0 @@
|
|||
process samtools_faidx {
|
||||
tag "$fasta"
|
||||
|
||||
container 'quay.io/biocontainers/samtools:1.9--h10a08f8_12'
|
||||
|
||||
input:
|
||||
path fasta
|
||||
|
||||
output:
|
||||
path "${fasta}.fai"
|
||||
|
||||
script:
|
||||
"""
|
||||
samtools faidx ${fasta}
|
||||
"""
|
||||
}
|
|
@ -1,27 +0,0 @@
|
|||
name: samtools faidx
|
||||
description: index a fasta file
|
||||
keywords:
|
||||
- faidx
|
||||
tools:
|
||||
- samtools:
|
||||
description: |
|
||||
SAMtools is a set of utilities for interacting with and post-processing
|
||||
short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li.
|
||||
These files are generated as output by short read aligners like BWA.
|
||||
homepage: http://www.htslib.org/
|
||||
documentation: hhttp://www.htslib.org/doc/samtools.html
|
||||
doi: 10.1093/bioinformatics/btp352
|
||||
input:
|
||||
-
|
||||
- input:
|
||||
type: file
|
||||
description: Input fasta file
|
||||
pattern: "*.{fasta,fa}"
|
||||
output:
|
||||
-
|
||||
- faidx:
|
||||
type: file
|
||||
description: samtools index fasta file
|
||||
pattern: "*.fasta.fai"
|
||||
authors:
|
||||
- "@maxulysse"
|
|
@ -1,13 +0,0 @@
|
|||
#!/usr/bin/env nextflow
|
||||
nextflow.preview.dsl = 2
|
||||
include '../../../tests/functions/check_process_outputs.nf' params(params)
|
||||
include '../main.nf' params(params)
|
||||
|
||||
// Define input channels
|
||||
input = '../../../test-datasets/tools/bwa/index/input/reference.fasta'
|
||||
|
||||
// Run the workflow
|
||||
workflow {
|
||||
samtools_faidx(input)
|
||||
// .check_output()
|
||||
}
|
|
@ -1,2 +0,0 @@
|
|||
docker.enabled = true
|
||||
params.outdir = './results'
|
|
@ -1,8 +0,0 @@
|
|||
FROM nfcore/base:1.7
|
||||
LABEL authors="chris.cheshire@crick.ac.uk" \
|
||||
description="Docker image containing all requirements for the nf-core umi_tools module"
|
||||
|
||||
# Install conda packages
|
||||
COPY environment.yml /
|
||||
RUN conda env create -f /environment.yml && conda clean -a
|
||||
ENV PATH /opt/conda/envs/nfcore-module-umitools/bin:$PATH
|
|
@ -1,10 +0,0 @@
|
|||
# This file creates a conda environment for the umi_tools module
|
||||
# conda env create -f environment.yml
|
||||
name: nfcore-module-umitools
|
||||
channels:
|
||||
- conda-forge
|
||||
- bioconda
|
||||
- defaults
|
||||
dependencies:
|
||||
- umi_tools=1.0.1
|
||||
- samtools=1.1.0
|
|
@ -1,45 +0,0 @@
|
|||
#!/usr/bin/env nextflow
|
||||
|
||||
// Specify DSL2
|
||||
nextflow.preview.dsl = 2
|
||||
|
||||
// Process definition
|
||||
process umitools_dedup {
|
||||
publishDir "${params.outdir}/umitools/dedup",
|
||||
mode: "copy", overwrite: true
|
||||
|
||||
container 'luslab/nf-modules-umitools:latest'
|
||||
|
||||
input:
|
||||
tuple val(sample_id), path(bam)
|
||||
|
||||
output:
|
||||
tuple val(sample_id), path("${sample_id}.dedup.bam"), emit: dedupBam
|
||||
tuple val(sample_id), path("${sample_id}.dedup.bam.bai"), emit: dedupBai
|
||||
path "*.dedup.log", emit: report
|
||||
|
||||
script:
|
||||
|
||||
// Init
|
||||
args = "--log=${sample_id}.dedup.log"
|
||||
|
||||
// Check main args string exists and strip whitespace
|
||||
if(params.umitools_dedup_args) {
|
||||
ext_args = params.umitools_dedup_args
|
||||
args += " " + ext_args.trim()
|
||||
}
|
||||
|
||||
// Contruct CL line
|
||||
dedup_command = "umi_tools dedup ${args} -I ${bam[0]} -S ${sample_id}.dedup.bam --output-stats=${sample_id}"
|
||||
|
||||
// Log
|
||||
if (params.verbose){
|
||||
println ("[MODULE] umi_tools/dedup command: " + dedup_command)
|
||||
}
|
||||
|
||||
//SHELL
|
||||
"""
|
||||
${dedup_command}
|
||||
samtools index ${sample_id}.dedup.bam
|
||||
"""
|
||||
}
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Some files were not shown because too many files have changed in this diff Show more
Loading…
Reference in a new issue