Merge pull request #140 from drpatelh/containers

Add build ids for all Conda packages defined in modules
This commit is contained in:
Harshil Patel 2021-02-03 15:49:45 +00:00 committed by GitHub
commit bffe3bb7b5
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
174 changed files with 66 additions and 7590 deletions

View file

@ -25,7 +25,7 @@ jobs:
matrix:
nxf_version: ['20.11.0-edge']
tags: ['${{ fromJson(needs.changes.outputs.modules) }}']
profile: ['docker', 'singularity', 'conda']
profile: ['docker'] ## 'conda', 'singularity'
env:
NXF_ANSI_LOG: false
steps:
@ -72,8 +72,11 @@ jobs:
uses: conda-incubator/setup-miniconda@v2
with:
auto-update-conda: true
channels: defaults, bioconda, conda-forge
channels: conda-forge,bioconda,defaults
python-version: ${{ matrix.python-version }}
- name: Conda clean
if: matrix.profile == 'conda'
run: conda clean -a
# Test the module
- name: Run pytest-workflow

View file

@ -1,7 +0,0 @@
FROM nfcore/base
LABEL authors="Jose Espinosa-Carrasco" \
description="Docker image containing all requirements for nf-core/modules/bedtools/complement"
COPY environment.yml /
RUN conda env create -f /environment.yml && conda clean -a
ENV PATH /opt/conda/envs/nf-core-bedtools-complement/bin:$PATH

View file

@ -1,9 +0,0 @@
# You can use this file to create a conda environment for this pipeline:
# conda env create -f environment.yml
name: nf-core-bedtools-complement
channels:
- conda-forge
- bioconda
- defaults
dependencies:
- bioconda::bedtools=2.29.2

View file

@ -1,32 +0,0 @@
def MODULE = "bedtools_complement"
params.publish_dir = MODULE
params.publish_results = "default"
process BEDTOOLS_COMPLEMENT {
tag {input_file}
publishDir "${params.out_dir}/${params.publish_dir}",
mode: params.publish_dir_mode,
saveAs: { filename ->
if (params.publish_results == "none") null
else filename }
container "docker.pkg.github.com/nf-core/$MODULE"
conda "${moduleDir}/environment.yml"
input:
path (input_file)
path (fasta_sizes)
val (bedtools_complement_args)
output:
path "${input_file}.bed", emit: complement
path "*.version.txt", emit: version
script:
"""
bedtools complement -i ${input_file} -g ${fasta_sizes} ${bedtools_complement_args} > ${input_file}.bed
bedtools --version | sed -n "s/.*\\(v.*\$\\)/\\1/p" > bedtools.version.txt
"""
}

View file

@ -1,30 +0,0 @@
name: bedtools complement
description: Returns all intervals in a genome that are not covered by at least one interval in the input BED/GFF/VCF file
keywords:
- complement
tools:
- bedtools:
description: |
Bedtools is a software package that provides with a toolset to perform genome arithmetic operations.
homepage: https://bedtools.readthedocs.io/en/latest/index.html
documentation: https://bedtools.readthedocs.io/en/latest/index.html
doi: 10.093/bioinformatics/btq033
input:
-
- input_file:
type: file
description: Input genomic coordinates file
pattern: "*.{bed,gff,vcf}"
- fasta_sizes:
type: file
description: Genome chromosome sizes
pattern: "*.{txt,sizes}"
output:
-
- index:
type: stdout,file
description:
pattern: "stdout,*.{bed,gff,vcf}"
authors:
- "@JoseEspinosa"

View file

@ -1 +0,0 @@
../../../../../tests/data/bed/A.bed

View file

@ -1 +0,0 @@
../../../../../tests/data/bed/genome.sizes

View file

@ -1,19 +0,0 @@
#!/usr/bin/env nextflow
nextflow.preview.dsl = 2
params.out_dir = "test_output"
params.fastqc_args = ''
params.publish_dir_mode = "copy"
params.bedtools_complement_args = ''
include BEDTOOLS_COMPLEMENT from '../main.nf' params(params)
// Define input channels
ch_input = Channel.fromPath('./input_data/A.bed')
chrom_sizes = Channel.fromPath('./input_data/genome.sizes')
// Run the workflow
workflow {
BEDTOOLS_COMPLEMENT(ch_input, chrom_sizes, params.bedtools_complement_args)
}

View file

@ -1,7 +0,0 @@
FROM nfcore/base
LABEL authors="Jose Espinosa-Carrasco" \
description="Docker image containing all requirements for nf-core/modules/bedtools/genomecov"
COPY environment.yml /
RUN conda env create -f /environment.yml && conda clean -a
ENV PATH /opt/conda/envs/nf-core-bedtools-genomecov/bin:$PATH

View file

@ -1,9 +0,0 @@
# You can use this file to create a conda environment for this pipeline:
# conda env create -f environment.yml
name: nf-core-bedtools-genomecov
channels:
- conda-forge
- bioconda
- defaults
dependencies:
- bioconda::bedtools=2.29.2

View file

@ -1,32 +0,0 @@
def MODULE = "bedtools_genomecov"
params.publish_dir = MODULE
params.publish_results = "default"
process BEDTOOLS_GENOMECOV {
tag {bam}
publishDir "${params.out_dir}/${params.publish_dir}",
mode: params.publish_dir_mode,
saveAs: { filename ->
if (params.publish_results == "none") null
else filename }
container "docker.pkg.github.com/nf-core/$MODULE"
conda "${moduleDir}/environment.yml"
input:
path (bam)
path (chrom_sizes)
val (bedtools_genomecov_args)
output:
path "${bam}.bed", emit: coverage
path "*.version.txt", emit: version
script:
"""
bedtools genomecov -ibam ${bam} -g ${chrom_sizes} ${bedtools_genomecov_args} > ${bam}.bed
bedtools --version | sed -n "s/.*\\(v.*\$\\)/\\1/p" > bedtools.version.txt
"""
}

View file

@ -1,30 +0,0 @@
name: bedtools genomecov
description: Returns feature coverage for a given genome in different formats
keywords:
- genomecov
tools:
- bedtools:
description: |
Bedtools is a software package that provides with a toolset to perform genome arithmetic operations.
homepage: https://bedtools.readthedocs.io/en/latest/index.html
documentation: https://bedtools.readthedocs.io/en/latest/index.html
doi: 10.093/bioinformatics/btq033
input:
-
- input_file:
type: file
description: Input genomic coordinates file
pattern: "*.{bam}"
- chrom_sizes:
type: file
description: Genome chromosome sizes
pattern: "*.{txt,sizes}"
output:
-
- index:
type: stdout,file
description:
pattern: "stdout,*.{bed,bedGraph}"
authors:
- "@JoseEspinosa"

View file

@ -1 +0,0 @@
../../../../../tests/data/bam/JK2067_downsampled_s0.1.bam

View file

@ -1 +0,0 @@
../../../../../tests/data/bed/genome.sizes

View file

@ -1,19 +0,0 @@
#!/usr/bin/env nextflow
nextflow.preview.dsl = 2
params.out_dir = "test_output"
params.fastqc_args = ''
params.publish_dir_mode = "copy"
params.bedtools_genomecov_args = '' //'-bg'
include BEDTOOLS_GENOMECOV from '../main.nf' params(params)
// Define input channels
ch_input = Channel.fromPath('./input_data/JK2067_downsampled_s0.1.bam')
chrom_sizes = Channel.fromPath('./input_data/genome.sizes')
// Run the workflow
workflow {
BEDTOOLS_GENOMECOV(ch_input, chrom_sizes, params.bedtools_genomecov_args)
}

View file

@ -1,7 +0,0 @@
FROM nfcore/base
LABEL authors="Jose Espinosa-Carrasco" \
description="Docker image containing all requirements for nf-core/modules/bedtools/intersect"
COPY environment.yml /
RUN conda env create -f /environment.yml && conda clean -a
ENV PATH /opt/conda/envs/nf-core-bedtools-intersectbed/bin:$PATH

View file

@ -1,9 +0,0 @@
# You can use this file to create a conda environment for this pipeline:
# conda env create -f environment.yml
name: nf-core-bedtools-intersect
channels:
- conda-forge
- bioconda
- defaults
dependencies:
- bioconda::bedtools=2.29.2

View file

@ -1,36 +0,0 @@
def MODULE = "bedtools_intersect"
params.publish_dir = MODULE
params.publish_results = "default"
process INTERSECT_BED {
tag "$input_file_1-$input_file_2"
publishDir "${params.out_dir}/${params.publish_dir}",
mode: params.publish_dir_mode,
saveAs: { filename ->
if (params.publish_results == "none") null
else filename }
container "docker.pkg.github.com/nf-core/$MODULE"
conda "${moduleDir}/environment.yml"
input:
path (input_file_1)
path (input_file_2)
val (intersectbed_args)
output:
path "${input_file_1.baseName}_i_${input_file_2.baseName}.bed", emit: intersect
path "*.version.txt", emit: version
script:
def params_string = intersectbed_args.collect {
/-$it.key $it.value/
} join " "
"""
bedtools intersect -a ${input_file_1} -b ${input_file_2} ${params_string} > ${input_file_1.baseName}_i_${input_file_2.baseName}.bed
bedtools --version | sed -n "s/.*\\(v.*\$\\)/\\1/p" > bedtools.version.txt
"""
}

View file

@ -1,30 +0,0 @@
name: bedtools intersect
description: Returns the overlapping features between two sets of genomics features
keywords:
- bedtools intersect
tools:
- bedtools:
description: |
Bedtools is a software package that provides with a toolset to perform genome arithmetic operations.
homepage: https://bedtools.readthedocs.io/en/latest/index.html
documentation: https://bedtools.readthedocs.io/en/latest/index.html
doi: 10.093/bioinformatics/btq033
input:
-
- input_file_1:
type: file
description: Input genomic coordinates file
pattern: "*.{bam,bed,BED,gff,vcf}"
- input_file_2:
type: file
description: Input genomic coordinates file
pattern: "*.{bam,bed,BED,gff,vcf}"
output:
-
- index:
type: stdout,file
description:
pattern: "stdout,*.{bed,BED}"
authors:
- "@JoseEspinosa"

View file

@ -1 +0,0 @@
../../../../../tests/data/bed/A.bed

View file

@ -1 +0,0 @@
../../../../../tests/data/bed/B.bed

View file

@ -1,24 +0,0 @@
#!/usr/bin/env nextflow
nextflow.preview.dsl = 2
params.out_dir = "test_output"
params.fastqc_args = ''
params.publish_dir_mode = "copy"
params.intersect_args = '' //'-bed -c -f 0.20'
include check_output from '../../../../tests/functions/check_process_outputs.nf' // params(params)
include INTERSECT_BED from '../main.nf' params(params)
// Define input channels
ch_input_1 = Channel.fromPath('./input_data/A.bed')
ch_input_2 = Channel.fromPath('./input_data/B.bed')
def additional_params_map = [:]
additional_params_map = [ s: "",
f: 0.9 ]
// Run the workflow
workflow {
INTERSECT_BED(ch_input_1, ch_input_2, additional_params_map)
}

View file

@ -1,7 +0,0 @@
FROM nfcore/base
LABEL authors="Jose Espinosa-Carrasco" \
description="Docker image containing all requirements for nf-core/modules/bedtools/complementbed"
COPY environment.yml /
RUN conda env create -f /environment.yml && conda clean -a
ENV PATH /opt/conda/envs/nf-core-bedtools-merge/bin:$PATH

View file

@ -1,9 +0,0 @@
# You can use this file to create a conda environment for this pipeline:
# conda env create -f environment.yml
name: nf-core-bedtools-merge
channels:
- conda-forge
- bioconda
- defaults
dependencies:
- bioconda::bedtools=2.29.2

View file

@ -1,31 +0,0 @@
def MODULE = "bedtools_merge"
params.publish_dir = MODULE
params.publish_results = "default"
process BEDTOOLS_MERGE {
tag { input_file }
publishDir "${params.out_dir}/${params.publish_dir}",
mode: params.publish_dir_mode,
saveAs: { filename ->
if (params.publish_results == "none") null
else filename }
container "docker.pkg.github.com/nf-core/$MODULE"
conda "${moduleDir}/environment.yml"
input:
path (input_file)
val (bedtools_merge_args)
output:
path "${input_file}.bed", emit: merge
path "*.version.txt", emit: version
script:
"""
bedtools merge -i ${input_file} ${bedtools_merge_args} > ${input_file}.bed
bedtools --version | sed -n "s/.*\\(v.*\$\\)/\\1/p" > bedtools.version.txt
"""
}

View file

@ -1,26 +0,0 @@
name: bedtools merge
description: Combines overlapping genome features of a single file
keywords:
- merge
tools:
- bedtools:
description: |
Bedtools is a software package that provides with a toolset to perform genome arithmetic operations.
homepage: https://bedtools.readthedocs.io/en/latest/index.html
documentation: https://bedtools.readthedocs.io/en/latest/index.html
doi: 10.093/bioinformatics/btq033
input:
-
- input_file:
type: file
description: Input genomic coordinates file
pattern: "*.{bed,gff,vcf,bam}"
output:
-
- index:
type: stdout,file
description:
pattern: "stdout,*.{bed}"
authors:
- "@JoseEspinosa"

View file

@ -1 +0,0 @@
../../../../../tests/data/bed/A.bed

View file

@ -1 +0,0 @@
../../../../../tests/data/bam/JK2067_downsampled_s0.1.bam

View file

@ -1,19 +0,0 @@
#!/usr/bin/env nextflow
nextflow.preview.dsl = 2
params.out_dir = "test_output"
params.fastqc_args = ''
params.publish_dir_mode = "copy"
params.bedtools_merge_args = '' //''-s -c 6 -o distinct'
include BEDTOOLS_MERGE from '../main.nf' params(params)
// Define input channels
ch_input = Channel.fromPath('./input_data/A.bed')
//ch_input = Channel.fromPath('./input_data/JK2067_downsampled_s0.1.bam')
// Run the workflow
workflow {
BEDTOOLS_MERGE(ch_input, params.bedtools_merge_args)
}

View file

@ -1,2 +0,0 @@
docker.enabled = true
params.outdir = './results'

View file

@ -1,9 +0,0 @@
# You can use this file to create a conda environment for this pipeline:
# conda env create -f environment.yml
name: nf-core-bedtools-sort
channels:
- conda-forge
- bioconda
- defaults
dependencies:
- bioconda::bedtools=2.29.2

View file

@ -1,31 +0,0 @@
def MODULE = "bedtools_sort"
params.publish_dir = MODULE
params.publish_results = "default"
process BEDTOOLS_SORT {
tag { input_file }
publishDir "${params.out_dir}/${params.publish_dir}",
mode: params.publish_dir_mode,
saveAs: { filename ->
if (params.publish_results == "none") null
else filename }
container "docker.pkg.github.com/nf-core/$MODULE"
conda "${moduleDir}/environment.yml"
input:
path (input_file)
val (bedtools_sort_args)
output:
path "${input_file}.bed", emit: sort
path "*.version.txt", emit: version
script:
"""
bedtools sort -i ${input_file} ${bedtools_sort_args} > ${input_file}.bed
bedtools --version | sed -n "s/.*\\(v.*\$\\)/\\1/p" > bedtools.version.txt
"""
}

View file

@ -1,26 +0,0 @@
name: bedtools sort
description: Returns a sorted feature file by chromosome and other criteria
keywords:
- sort
tools:
- bedtools:
description: |
Bedtools is a software package that provides with a toolset to perform genome arithmetic operations.
homepage: https://bedtools.readthedocs.io/en/latest/index.html
documentation: https://bedtools.readthedocs.io/en/latest/index.html
doi: 10.093/bioinformatics/btq033
input:
-
- input_file:
type: file
description: Input genomic coordinates file
pattern: "*.{bed,gff,vcf}"
output:
-
- index:
type: stdout,file
description:
pattern: "stdout,*.{bed,gff,vcf}"
authors:
- "@JoseEspinosa"

View file

@ -1 +0,0 @@
../../../../../tests/data/bed/A.bed

View file

@ -1,18 +0,0 @@
#!/usr/bin/env nextflow
nextflow.preview.dsl = 2
params.out_dir = "test_output"
params.fastqc_args = ''
params.publish_dir_mode = "copy"
params.bedtools_sort_args = '' //'-sizeD'
include BEDTOOLS_SORT from '../main.nf' params(params)
// Define input channels
ch_input = Channel.fromPath('./input_data/A.bed')
// Run the workflow
workflow {
BEDTOOLS_SORT(ch_input, params.bedtools_sort_args)
}

View file

@ -1,52 +0,0 @@
nextflow.preview.dsl=2
params.genome = ''
process BOWTIE2 {
// depending on the genome used one might want/need to adjust the memory settings.
// For the E. coli test data this is probably not required
// label 'bigMem'
// label 'multiCore'
publishDir "$outdir/bowtie2",
mode: "copy", overwrite: true
input:
tuple val(name), path(reads)
val (outdir)
val (bowtie2_args)
val (verbose)
output:
path "*bam", emit: bam
path "*stats.txt", emit: stats
script:
if (verbose){
println ("[MODULE] BOWTIE2 ARGS: " + bowtie2_args)
}
cores = 4
readString = ""
// Options we add are
bowtie2_options = bowtie2_args
bowtie2_options += " --no-unal " // We don't need unaligned reads in the BAM file
// single-end / paired-end distinction. Might also be handled via params.single_end
if (reads instanceof List) {
readString = "-1 " + reads[0] + " -2 " + reads[1]
}
else {
readString = "-U " + reads
}
index = params.genome["bowtie2"]
bowtie2_name = name + "_" + params.genome["name"]
"""
bowtie2 -x ${index} -p ${cores} ${bowtie2_options} ${readString} 2>${bowtie2_name}_bowtie2_stats.txt | samtools view -bS -F 4 -F 8 -F 256 -> ${bowtie2_name}_bowtie2.bam
"""
}

View file

@ -1,37 +0,0 @@
name: Bowtie 2
description: Ultrafast alignment to reference genome
keywords:
- Alignment
- Short reads
- FM Index
tools:
- fastqc:
description: |
Bowtie 2 is an ultrafast and memory-efficient tool for aligning sequencing reads
to long reference sequences. It is particularly good at aligning reads of about
50 up to 100s or 1,000s of characters, and particularly good at aligning to relatively
long (e.g. mammalian) genomes. Bowtie 2 indexes the genome with an FM Index to keep
its memory footprint small: for the human genome, its memory footprint is typically
around 3.2 GB. Bowtie 2 supports gapped, local, and paired-end alignment modes.
homepage: http://bowtie-bio.sourceforge.net/bowtie2/index.shtml
documentation: http://bowtie-bio.sourceforge.net/bowtie2/manual.shtml
input:
-
- sample_id:
type: string
description: Sample identifier
- reads:
type: file
description: Input FastQ file, or pair of files
output:
-
- report:
type: file
description: mapping statistics report
pattern: "*bowtie2_stats.txt"
- alignment:
type: file
description: alignment file in BAM format
pattern: "*bowtie2.bam"
authors:
- "@FelixKrueger"

View file

@ -1 +0,0 @@
../../../../../tests/data/fasta/E_coli/NC_010473.fa

View file

@ -1 +0,0 @@
../../../../tests/data/fastq/dna/Ecoli_DNA_R1.fastq.gz

View file

@ -1 +0,0 @@
../../../../tests/data/fastq/dna/Ecoli_DNA_R2.fastq.gz

View file

@ -1 +0,0 @@
../../../../tests/data/fastq/rna/test_R1_val_1.fq.gz

View file

@ -1 +0,0 @@
../../../../tests/data/fastq/rna/test_R2_val_2.fq.gz

View file

@ -1,31 +0,0 @@
#!/usr/bin/env nextflow
nextflow.preview.dsl=2
params.outdir = "."
params.genome = ""
params.bowtie2_args = ''
// Bowtie2 arguments should be supplied in the following format to work:
// --bowtie2_args="--score-min L,0,-0.8"
params.verbose = false
if (params.verbose){
println ("[WORKFLOW] BOWTIE2 ARGS: " + params.bowtie2_args)
}
// for other genomes this needs to be handled somehow to return all possible genomes
genomeValues = ["name" : params.genome]
genomeValues["bowtie2"] = "/bi/home/fkrueger/VersionControl/nf-core-modules/test-datasets/indices/bowtie2/E_coli/${params.genome}";
include '../main.nf' params(genome: genomeValues)
ch_read_files = Channel
.fromFilePairs('../../../test-datasets/Ecoli*{1,2}.fastq.gz',size:-1)
// .view() // to check whether the input channel works
workflow {
main:
BOWTIE2(ch_read_files, params.outdir, params.bowtie2_args, params.verbose)
}

View file

@ -1,2 +0,0 @@
docker.enabled = true
params.outdir = './results'

View file

@ -1,15 +0,0 @@
10000 reads; of these:
10000 (100.00%) were paired; of these:
893 (8.93%) aligned concordantly 0 times
8474 (84.74%) aligned concordantly exactly 1 time
633 (6.33%) aligned concordantly >1 times
----
893 pairs aligned concordantly 0 times; of these:
815 (91.27%) aligned discordantly 1 time
----
78 pairs aligned 0 times concordantly or discordantly; of these:
156 mates make up the pairs; of these:
0 (0.00%) aligned 0 times
1 (0.64%) aligned exactly 1 time
155 (99.36%) aligned >1 times
100.00% overall alignment rate

View file

@ -1,15 +0,0 @@
9979 reads; of these:
9979 (100.00%) were paired; of these:
3584 (35.92%) aligned concordantly 0 times
3705 (37.13%) aligned concordantly exactly 1 time
2690 (26.96%) aligned concordantly >1 times
----
3584 pairs aligned concordantly 0 times; of these:
886 (24.72%) aligned discordantly 1 time
----
2698 pairs aligned 0 times concordantly or discordantly; of these:
5396 mates make up the pairs; of these:
2282 (42.29%) aligned 0 times
1467 (27.19%) aligned exactly 1 time
1647 (30.52%) aligned >1 times
88.57% overall alignment rate

View file

@ -1,41 +0,0 @@
process cutadapt {
tag "${sample_id}"
container 'quay.io/biocontainers/cutadapt:1.16--py27_1'
input:
tuple val(sample_id), path(reads)
output:
tuple sample_id, path("trimmed_*.fastq")
script:
forward_fq = "trimmed_1.fastq"
reverse_fq = "trimmed_2.fastq"
if (params.single_end) {
processing = """
cutadapt \
-j ${task.cpus} \
-q $params.cutadapt_min_quality \
--minimum-length $params.cutadapt_min_length \
--output ${forward_fq} \
${reads}
"""
} else {
processing = """
cutadapt \
-j ${task.cpus} \
-q $params.cutadapt_min_quality \
--minimum-length $params.cutadapt_min_length \
--pair-filter=any \
--output ${forward_fq} \
--paired-output ${reverse_fq} ${reads}
"""
}
version = "cutadapt --version &> v_cutadapt.txt"
return processing + version
}

View file

@ -1,36 +0,0 @@
name: Cutadapt
description: cutadapt removes adapter sequences from high-throughput sequencing reads
keywords:
- Quality Control
- QC
- Adapters
tools:
- fastqc:
description: |
Cutadapt finds and removes adapter sequences, primers, poly-A tails and other types of unwanted sequence
from your high-throughput sequencing reads.
Cleaning your data in this way is often required: Reads from small-RNA sequencing contain the 3
sequencing adapter because the read is longer than the molecule that is sequenced. Amplicon reads
start with a primer sequence. Poly-A tails are useful for pulling out RNA from your sample, but
often you dont want them to be in your reads.
homepage: https://cutadapt.readthedocs.io/en/stable/
documentation: https://cutadapt.readthedocs.io/en/stable/
input:
-
- sample_id:
type: string
description: Sample identifier
- reads:
type: file
description: Input FastQ file, or pair of files
output:
-
- sample_id:
type: string
description: Sample identifier
- reads:
type: file
description: trimmed FastQ file, or pair of files
authors:
- "@piotr-faba-ardigen"

View file

@ -1,20 +0,0 @@
#!/usr/bin/env nextflow
nextflow.preview.dsl = 2
include '../main.nf' params(params)
// Define input channels
input_fastqs = Channel.fromFilePairs('../../../test-datasets/tools/cutadapt/input/*_{1,2}.fastq' )
if(params.single_end){
input_fastqs = Channel.from([
['SRR4238351', '../../../test-datasets/tools/cutadapt/input/SRR4238351_subsamp.fastq.gz'],
['SRR4238355', '../../../test-datasets/tools/cutadapt/input/SRR4238355_subsamp.fastq.gz'],
['SRR4238359', '../../../test-datasets/tools/cutadapt/input/SRR4238359_subsamp.fastq.gz'],
['SRR4238379', '../../../test-datasets/tools/cutadapt/input/SRR4238379_subsamp.fastq.gz']
]).map { row -> [ row[0], [ file(row[1]) ] ] }
}
// Run the workflow
workflow {
cutadapt(input_fastqs)
}

View file

@ -1,9 +0,0 @@
docker.enabled = true
params.outdir = './results'
params{
// Preprocessing options
cutadapt_min_length = 40
cutadapt_min_quality = 25
single_end = false
}

View file

@ -1,58 +0,0 @@
nextflow.preview.dsl=2
params.genome = ''
process HISAT2 {
// depending on the genome used one might want/need to adjust the memory settings.
// For the E. coli test data this is probably not required
// label 'bigMem'
// label 'multiCore'
publishDir "$outdir/hisat2",
mode: "copy", overwrite: true
input:
tuple val(name), path(reads)
val outdir
val hisat2_args
val verbose
output:
path "*bam", emit: bam
path "*stats.txt", emit: stats
script:
if (verbose){
println ("[MODULE] HISAT2 ARGS: " + hisat2_args)
}
cores = 4
readString = ""
hisat_options = hisat2_args
// Options we add are
hisat_options = hisat_options + " --no-unal --no-softclip "
if (reads instanceof List) {
readString = "-1 "+reads[0]+" -2 "+reads[1]
hisat_options = hisat_options + " --no-mixed --no-discordant"
}
else {
readString = "-U "+reads
}
index = params.genome["hisat2"]
splices = ''
if (params.genome.containsKey("hisat2_splices")){
splices = " --known-splicesite-infile " + params.genome["hisat2_splices"]
}
else{
println ("No key 'hisat2_splices' was supplied. Skipping...")
}
hisat_name = name + "_" + params.genome["name"]
"""
hisat2 -p ${cores} ${hisat_options} -x ${index} ${splices} ${readString} 2>${hisat_name}_hisat2_stats.txt | samtools view -bS -F 4 -F 8 -F 256 -> ${hisat_name}_hisat2.bam
"""
}

View file

@ -1,37 +0,0 @@
name: HISAT2
description: Graph-based alignment of next generation sequencing reads to a population of genomes
keywords:
- Alignment
- Short reads
- graph FM Index (GFM)
- RNA-seq
tools:
- fastqc:
description: |
HISAT2 is a fast and sensitive alignment program for mapping next-generation
sequencing reads (whole-genome, transcriptome, and exome sequencing data)
against the general human population (as well as against a single reference genome).
Based on GCSA (an extension of BWT for a graph) it is designed and implemented as a
graph FM index (GFM).
homepage: http://daehwankimlab.github.io/hisat2/
documentation: https://ccb.jhu.edu/software/hisat2/manual.shtml
input:
-
- sample_id:
type: string
description: Sample identifier
- reads:
type: file
description: Input FastQ file, or pair of files
output:
-
- report:
type: file
description: mapping statistics report
pattern: "*hisat2_stats.txt"
- alignment:
type: file
description: alignment file in BAM format
pattern: "*hisat2.bam"
authors:
- "@FelixKrueger"

View file

@ -1 +0,0 @@
../../../../../tests/data/fasta/E_coli/NC_010473.fa

View file

@ -1 +0,0 @@
../../../../tests/data/fastq/dna/Ecoli_DNA_R1.fastq.gz

View file

@ -1 +0,0 @@
../../../../tests/data/fastq/dna/Ecoli_DNA_R2.fastq.gz

View file

@ -1,34 +0,0 @@
#!/usr/bin/env nextflow
nextflow.preview.dsl=2
params.outdir = "."
params.genome = ""
params.hisat2_args = ''
// HISAT2 arguments should be supplied in the following format to work:
// --hisat2_args="--score-min L,0,-0.8"
params.verbose = false
if (params.verbose){
println ("[WORKFLOW] HISAT2 ARGS ARE: " + params.hisat2_args)
}
// for other genomes this needs to be handled somehow to return all possible genomes
genomeValues = ["name" : params.genome]
genomeValues["hisat2"] = "/bi/home/fkrueger/VersionControl/nf-core-modules/test-datasets/indices/hisat2/E_coli/${params.genome}";
include '../main.nf' params(genome: genomeValues)
ch_read_files = Channel
.fromFilePairs('../../../test-datasets/Ecoli*{1,2}.fastq.gz',size:-1)
// .view() // to check whether the input channel works
workflow {
main:
HISAT2(ch_read_files, params.outdir, params.hisat2_args, params.verbose)
}

View file

@ -1,2 +0,0 @@
// docker.enabled = true
params.outdir = './results'

View file

@ -1,6 +0,0 @@
10000 reads; of these:
10000 (100.00%) were paired; of these:
823 (8.23%) aligned concordantly 0 times
8583 (85.83%) aligned concordantly exactly 1 time
594 (5.94%) aligned concordantly >1 times
91.77% overall alignment rate

View file

@ -1,31 +0,0 @@
nextflow.preview.dsl=2
process MULTIQC {
// tag "FastQC - $sample_id"
publishDir "${outdir}/multiqc",
mode: "copy", overwrite: true
input:
path file
val outdir
val multiqc_args
// multiqc_args are best passed into the workflow in the following manner:
// --multiqc_args="--exlude STAR --title custom_report_title"
val verbose
output:
path "*html", emit: html
script:
if (verbose){
println ("[MODULE] MULTIQC ARGS: " + multiqc_args)
}
"""
multiqc $multiqc_args -x work .
"""
}

View file

@ -1,26 +0,0 @@
name: MultiQC
description: Aggregate results from bioinformatics analyses across many samples into a single report
keywords:
- QC
- bioinformatics tools
- Beautiful stand-alone HTML report
tools:
- fastqc:
description: |
MultiQC searches a given directory for analysis logs and compiles a HTML report.
It's a general use tool, perfect for summarising the output from numerous bioinformatics tools.
homepage: https://multiqc.info/
documentation: https://multiqc.info/docs/
input:
-
- reads:
type: file
description: List of report file(s)
output:
-
- multiqc_report:
type: file
description: MultiQC report
pattern: "*multiqc*.html"
authors:
- "@FelixKrueger"

View file

@ -1 +0,0 @@
../../../../bowtie2/test/output/test_GRCm38_bowtie2_stats.txt

View file

@ -1 +0,0 @@
../../../../fastq_screen/test/output/test_R1_screen.txt

View file

@ -1 +0,0 @@
../../../../fastqc/test/output/test_R1_fastqc.zip

View file

@ -1 +0,0 @@
../../../../fastqc/test/output/test_R1_val_1_fastqc.zip

View file

@ -1 +0,0 @@
../../../../fastqc/test/output/test_R2_fastqc.zip

View file

@ -1 +0,0 @@
../../../../fastqc/test/output/test_R2_val_2_fastqc.zip

View file

@ -1 +0,0 @@
../../../../hisat2/test/output/Ecoli_DNA_R_E_coli_hisat2_stats.txt

View file

@ -1 +0,0 @@
../../../../trim_galore/test/output/test_R1.fastq.gz_trimming_report.txt

View file

@ -1 +0,0 @@
../../../../trim_galore/test/output/test_R2.fastq.gz_trimming_report.txt

View file

@ -1,44 +0,0 @@
#!/usr/bin/env nextflow
nextflow.preview.dsl=2
params.outdir = "."
params.verbose = false
params.multiqc_args = ''
// include '../../../tests/functions/check_process_outputs.nf'
include '../main.nf'
if (params.verbose){
println ("[WORKFLOW] MULTIQC ARGS: " + params.multiqc_args)
}
multiqc_ch = Channel
.fromPath( ['../../../test-datasets/*trimming_report.txt','../../../test-datasets/*fastqc.zip','../../../test-datasets/*screen.txt','../../../test-datasets/*bowtie2_stats.txt'] )
.collect() // collect() flattens all channels to single list
// .view() // view the files in the channel
// Run the workflow
workflow {
main:
// This is an example workflow for real reads aligned with Bowtie2. Just for illustration purposes
// FASTQC (file_ch, params.outdir, params.fastqc_args, params.verbose)
// FASTQ_SCREEN (file_ch, params.outdir, params.fastq_screen_args, params.verbose)
// TRIM_GALORE (file_ch, params.outdir, params.trim_galore_args, params.verbose)
// FASTQC2 (TRIM_GALORE.out.reads, params.outdir, params.fastqc_args, params.verbose)
// BOWTIE2 (TRIM_GALORE.out.reads, params.outdir, params.bowtie2_args, params.verbose)
// merging channels for MultiQC
// multiqc_ch = FASTQC.out.report.mix(
// TRIM_GALORE.out.report,
// FASTQ_SCREEN.out.report,
// FASTQC2.out.report,
// BOWTIE2.out.stats,
// ).collect()
MULTIQC (multiqc_ch, params.outdir, params.multiqc_args, params.verbose)
// .check_output() TODO
}

View file

@ -1,2 +0,0 @@
// docker.enabled = true
params.outdir = './results'

File diff suppressed because one or more lines are too long

View file

@ -1,16 +0,0 @@
process samtools_faidx {
tag "$fasta"
container 'quay.io/biocontainers/samtools:1.9--h10a08f8_12'
input:
path fasta
output:
path "${fasta}.fai"
script:
"""
samtools faidx ${fasta}
"""
}

View file

@ -1,27 +0,0 @@
name: samtools faidx
description: index a fasta file
keywords:
- faidx
tools:
- samtools:
description: |
SAMtools is a set of utilities for interacting with and post-processing
short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li.
These files are generated as output by short read aligners like BWA.
homepage: http://www.htslib.org/
documentation: hhttp://www.htslib.org/doc/samtools.html
doi: 10.1093/bioinformatics/btp352
input:
-
- input:
type: file
description: Input fasta file
pattern: "*.{fasta,fa}"
output:
-
- faidx:
type: file
description: samtools index fasta file
pattern: "*.fasta.fai"
authors:
- "@maxulysse"

View file

@ -1,13 +0,0 @@
#!/usr/bin/env nextflow
nextflow.preview.dsl = 2
include '../../../tests/functions/check_process_outputs.nf' params(params)
include '../main.nf' params(params)
// Define input channels
input = '../../../test-datasets/tools/bwa/index/input/reference.fasta'
// Run the workflow
workflow {
samtools_faidx(input)
// .check_output()
}

View file

@ -1,2 +0,0 @@
docker.enabled = true
params.outdir = './results'

View file

@ -1,8 +0,0 @@
FROM nfcore/base:1.7
LABEL authors="chris.cheshire@crick.ac.uk" \
description="Docker image containing all requirements for the nf-core umi_tools module"
# Install conda packages
COPY environment.yml /
RUN conda env create -f /environment.yml && conda clean -a
ENV PATH /opt/conda/envs/nfcore-module-umitools/bin:$PATH

View file

@ -1,10 +0,0 @@
# This file creates a conda environment for the umi_tools module
# conda env create -f environment.yml
name: nfcore-module-umitools
channels:
- conda-forge
- bioconda
- defaults
dependencies:
- umi_tools=1.0.1
- samtools=1.1.0

View file

@ -1,45 +0,0 @@
#!/usr/bin/env nextflow
// Specify DSL2
nextflow.preview.dsl = 2
// Process definition
process umitools_dedup {
publishDir "${params.outdir}/umitools/dedup",
mode: "copy", overwrite: true
container 'luslab/nf-modules-umitools:latest'
input:
tuple val(sample_id), path(bam)
output:
tuple val(sample_id), path("${sample_id}.dedup.bam"), emit: dedupBam
tuple val(sample_id), path("${sample_id}.dedup.bam.bai"), emit: dedupBai
path "*.dedup.log", emit: report
script:
// Init
args = "--log=${sample_id}.dedup.log"
// Check main args string exists and strip whitespace
if(params.umitools_dedup_args) {
ext_args = params.umitools_dedup_args
args += " " + ext_args.trim()
}
// Contruct CL line
dedup_command = "umi_tools dedup ${args} -I ${bam[0]} -S ${sample_id}.dedup.bam --output-stats=${sample_id}"
// Log
if (params.verbose){
println ("[MODULE] umi_tools/dedup command: " + dedup_command)
}
//SHELL
"""
${dedup_command}
samtools index ${sample_id}.dedup.bam
"""
}

Some files were not shown because too many files have changed in this diff Show more