From 97d9ae76f0e17d369ddd05833db2a821d4be0c22 Mon Sep 17 00:00:00 2001 From: Piotr Faba Date: Thu, 5 Mar 2020 15:56:11 +0000 Subject: [PATCH 01/12] add cutadapt module --- tools/cutadapt/main.nf | 24 ++++++++++++++++++++++++ tools/cutadapt/meta.yml | 40 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 64 insertions(+) create mode 100644 tools/cutadapt/main.nf create mode 100644 tools/cutadapt/meta.yml diff --git a/tools/cutadapt/main.nf b/tools/cutadapt/main.nf new file mode 100644 index 00000000..c647140b --- /dev/null +++ b/tools/cutadapt/main.nf @@ -0,0 +1,24 @@ +process cutadapt { + tag "${sample_id}" + + container 'quay.io/biocontainers/cutadapt:1.16--py27_1' + + input: + tuple sample_id, file(input_forward_fq), file(input_reverse_fq) + + output: + tuple sample_id, file(output_forward_fq), file(output_reverse_fq) + + script: + """ + cutadapt \ + -j ${task.cpus} \ + -q $params.cutadapt_min_quality \ + --minimum-length $params.cutadapt_min_length \ + --pair-filter=any \ + --output ${forward_fq} \ + --paired-output ${reverse_fq} '$input_forward_fq' '$input_reverse_fq' + + cutadapt --version &> v_cutadapt.txt + """ +} diff --git a/tools/cutadapt/meta.yml b/tools/cutadapt/meta.yml new file mode 100644 index 00000000..e4e1deb6 --- /dev/null +++ b/tools/cutadapt/meta.yml @@ -0,0 +1,40 @@ +name: Cutadapt +description: cutadapt removes adapter sequences from high-throughput sequencing reads +keywords: + - Quality Control + - QC + - Adapters +tools: + - fastqc: + description: | + Cutadapt finds and removes adapter sequences, primers, poly-A tails and other types of unwanted sequence + from your high-throughput sequencing reads. + + Cleaning your data in this way is often required: Reads from small-RNA sequencing contain the 3’ + sequencing adapter because the read is longer than the molecule that is sequenced. Amplicon reads + start with a primer sequence. Poly-A tails are useful for pulling out RNA from your sample, but + often you don’t want them to be in your reads. + homepage: https://cutadapt.readthedocs.io/en/stable/ + documentation: https://cutadapt.readthedocs.io/en/stable/ +input: + - - sample_id: + type: string + description: Sample identifier + - input_forward_fq: + type: file + description: Input FastQ forward read file of the pair + - input_reverse_fq: + type: file + description: Input FastQ reverse read file of the pair +output: + - - sample_id: + type: string + description: Sample identifier + - output_forward_fq: + type: file + description: Output FastQ forward read file of the pair + - output_reverse_fq: + type: file + description: Output FastQ reverse read file of the pair +authors: + - @piotr-faba-ardigen From 88bc7dcdd8b261549baff4029126e04d85390a7d Mon Sep 17 00:00:00 2001 From: Piotr Faba Date: Fri, 6 Mar 2020 10:51:46 +0000 Subject: [PATCH 02/12] the module is working in test --- test-datasets | 2 +- tools/cutadapt/main.nf | 7 +++++-- tools/cutadapt/test/main.nf | 22 ++++++++++++++++++++++ tools/cutadapt/test/nextflow.config | 8 ++++++++ 4 files changed, 36 insertions(+), 3 deletions(-) create mode 100644 tools/cutadapt/test/main.nf create mode 100644 tools/cutadapt/test/nextflow.config diff --git a/test-datasets b/test-datasets index e5fef889..be215c08 160000 --- a/test-datasets +++ b/test-datasets @@ -1 +1 @@ -Subproject commit e5fef88994b8d34c7bf4b07116e5f7a330d2ee3b +Subproject commit be215c0874e2485f0d8e12a6f0addf08f2cd08df diff --git a/tools/cutadapt/main.nf b/tools/cutadapt/main.nf index c647140b..432eccb4 100644 --- a/tools/cutadapt/main.nf +++ b/tools/cutadapt/main.nf @@ -7,9 +7,12 @@ process cutadapt { tuple sample_id, file(input_forward_fq), file(input_reverse_fq) output: - tuple sample_id, file(output_forward_fq), file(output_reverse_fq) + tuple sample_id, file(forward_fq), file(reverse_fq) script: + forward_fq = "trimmed_forward.fastq" + reverse_fq = "trimmed_reverse.fastq" + """ cutadapt \ -j ${task.cpus} \ @@ -17,7 +20,7 @@ process cutadapt { --minimum-length $params.cutadapt_min_length \ --pair-filter=any \ --output ${forward_fq} \ - --paired-output ${reverse_fq} '$input_forward_fq' '$input_reverse_fq' + --paired-output ${reverse_fq} '${input_forward_fq}' '${input_reverse_fq}' cutadapt --version &> v_cutadapt.txt """ diff --git a/tools/cutadapt/test/main.nf b/tools/cutadapt/test/main.nf new file mode 100644 index 00000000..69c1eef8 --- /dev/null +++ b/tools/cutadapt/test/main.nf @@ -0,0 +1,22 @@ +#!/usr/bin/env nextflow +nextflow.preview.dsl = 2 +include '../../../nf-core/module_testing/check_process_outputs.nf' params(params) +include '../main.nf' params(params) + +// Define input channels +readPaths = [ + [ sample: 'SRR4238351', + R1: '../../../test-datasets/tools/cutadapt/input/SRR396636.sra_1.fastq', + R2: '../../../test-datasets/tools/cutadapt/input/SRR396636.sra_2.fastq' + ] +] +Channel + .from(readPaths) + .map { row -> tuple( row.sample_name, file(row.R1.trim()), file(row.R2.trim()) ) } + .set { ch_read_files } + +// Run the workflow +workflow { + cutadapt(ch_read_files) + // .check_output() +} diff --git a/tools/cutadapt/test/nextflow.config b/tools/cutadapt/test/nextflow.config new file mode 100644 index 00000000..b1c505a7 --- /dev/null +++ b/tools/cutadapt/test/nextflow.config @@ -0,0 +1,8 @@ +docker.enabled = true +params.outdir = './results' + +params{ + //preprocessing options + cutadapt_min_length = 40 + cutadapt_min_quality = 25 +} From 490eca706c3d2ecc52395d977e5fe29145eba60c Mon Sep 17 00:00:00 2001 From: Piotr Faba Date: Fri, 6 Mar 2020 13:23:12 +0000 Subject: [PATCH 03/12] single and paired end supported --- tools/cutadapt/main.nf | 42 +++++++++++++------ tools/cutadapt/test/main.nf | 22 ---------- tools/cutadapt/test_paired/main.nf | 14 +++++++ tools/cutadapt/test_paired/nextflow.config | 9 ++++ tools/cutadapt/test_single/main.nf | 21 ++++++++++ .../{test => test_single}/nextflow.config | 1 + 6 files changed, 75 insertions(+), 34 deletions(-) delete mode 100644 tools/cutadapt/test/main.nf create mode 100644 tools/cutadapt/test_paired/main.nf create mode 100644 tools/cutadapt/test_paired/nextflow.config create mode 100644 tools/cutadapt/test_single/main.nf rename tools/cutadapt/{test => test_single}/nextflow.config (87%) diff --git a/tools/cutadapt/main.nf b/tools/cutadapt/main.nf index 432eccb4..bdd444af 100644 --- a/tools/cutadapt/main.nf +++ b/tools/cutadapt/main.nf @@ -4,24 +4,42 @@ process cutadapt { container 'quay.io/biocontainers/cutadapt:1.16--py27_1' input: - tuple sample_id, file(input_forward_fq), file(input_reverse_fq) + tuple val(sample_id), file(reads) output: - tuple sample_id, file(forward_fq), file(reverse_fq) + tuple sample_id, file("trimmed_*.fastq") script: - forward_fq = "trimmed_forward.fastq" - reverse_fq = "trimmed_reverse.fastq" + forward_fq = "trimmed_1.fastq" + reverse_fq = "trimmed_2.fastq" - """ - cutadapt \ - -j ${task.cpus} \ - -q $params.cutadapt_min_quality \ - --minimum-length $params.cutadapt_min_length \ - --pair-filter=any \ - --output ${forward_fq} \ - --paired-output ${reverse_fq} '${input_forward_fq}' '${input_reverse_fq}' + if (params.singleEnd) { + processing = """ + cutadapt \ + -j ${task.cpus} \ + -q $params.cutadapt_min_quality \ + --minimum-length $params.cutadapt_min_length \ + --output ${forward_fq} \ + ${reads} + """ + } else { + processing = """ + cutadapt \ + -j ${task.cpus} \ + -q $params.cutadapt_min_quality \ + --minimum-length $params.cutadapt_min_length \ + --pair-filter=any \ + --output ${forward_fq} \ + --paired-output ${reverse_fq} ${reads} + + + """ + } + + version = """ cutadapt --version &> v_cutadapt.txt """ + + return processing + version } diff --git a/tools/cutadapt/test/main.nf b/tools/cutadapt/test/main.nf deleted file mode 100644 index 69c1eef8..00000000 --- a/tools/cutadapt/test/main.nf +++ /dev/null @@ -1,22 +0,0 @@ -#!/usr/bin/env nextflow -nextflow.preview.dsl = 2 -include '../../../nf-core/module_testing/check_process_outputs.nf' params(params) -include '../main.nf' params(params) - -// Define input channels -readPaths = [ - [ sample: 'SRR4238351', - R1: '../../../test-datasets/tools/cutadapt/input/SRR396636.sra_1.fastq', - R2: '../../../test-datasets/tools/cutadapt/input/SRR396636.sra_2.fastq' - ] -] -Channel - .from(readPaths) - .map { row -> tuple( row.sample_name, file(row.R1.trim()), file(row.R2.trim()) ) } - .set { ch_read_files } - -// Run the workflow -workflow { - cutadapt(ch_read_files) - // .check_output() -} diff --git a/tools/cutadapt/test_paired/main.nf b/tools/cutadapt/test_paired/main.nf new file mode 100644 index 00000000..46d0fdc6 --- /dev/null +++ b/tools/cutadapt/test_paired/main.nf @@ -0,0 +1,14 @@ +#!/usr/bin/env nextflow +nextflow.preview.dsl = 2 +include '../main.nf' params(params) + +// Define input channels + +Channel + .fromFilePairs('../../../test-datasets/tools/cutadapt/input/*_{1,2}.fastq' ) + .set { ch_read_files } + +// Run the workflow +workflow { + cutadapt(ch_read_files) +} diff --git a/tools/cutadapt/test_paired/nextflow.config b/tools/cutadapt/test_paired/nextflow.config new file mode 100644 index 00000000..08e52203 --- /dev/null +++ b/tools/cutadapt/test_paired/nextflow.config @@ -0,0 +1,9 @@ +docker.enabled = true +params.outdir = './results' + +params{ + //preprocessing options + cutadapt_min_length = 40 + cutadapt_min_quality = 25 + singleEnd = false +} diff --git a/tools/cutadapt/test_single/main.nf b/tools/cutadapt/test_single/main.nf new file mode 100644 index 00000000..96947ad0 --- /dev/null +++ b/tools/cutadapt/test_single/main.nf @@ -0,0 +1,21 @@ +#!/usr/bin/env nextflow +nextflow.preview.dsl = 2 +include '../main.nf' params(params) + +// Define input channels + +readPaths = [ + ['SRR4238351', '../../../test-datasets/tools/fastqc/input/SRR4238351_subsamp.fastq.gz'], + ['SRR4238355', '../../../test-datasets/tools/fastqc/input/SRR4238355_subsamp.fastq.gz'], + ['SRR4238359', '../../../test-datasets/tools/fastqc/input/SRR4238359_subsamp.fastq.gz'], + ['SRR4238379', '../../../test-datasets/tools/fastqc/input/SRR4238379_subsamp.fastq.gz'] +] +Channel + .from(readPaths) + .map { row -> [ row[0], [ file(row[1]) ] ] } + .set { ch_read_files } + +// Run the workflow +workflow { + cutadapt(ch_read_files) +} diff --git a/tools/cutadapt/test/nextflow.config b/tools/cutadapt/test_single/nextflow.config similarity index 87% rename from tools/cutadapt/test/nextflow.config rename to tools/cutadapt/test_single/nextflow.config index b1c505a7..4b805ff3 100644 --- a/tools/cutadapt/test/nextflow.config +++ b/tools/cutadapt/test_single/nextflow.config @@ -5,4 +5,5 @@ params{ //preprocessing options cutadapt_min_length = 40 cutadapt_min_quality = 25 + singleEnd = true } From 1c941ef8d667a7d078f9f4e606c6358b689beb7d Mon Sep 17 00:00:00 2001 From: Piotr Faba Date: Fri, 6 Mar 2020 13:59:14 +0000 Subject: [PATCH 04/12] Update test-data commit and references --- test-datasets | 2 +- tools/cutadapt/test_single/main.nf | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/test-datasets b/test-datasets index be215c08..aae85a5c 160000 --- a/test-datasets +++ b/test-datasets @@ -1 +1 @@ -Subproject commit be215c0874e2485f0d8e12a6f0addf08f2cd08df +Subproject commit aae85a5c9c72238959108212481ce83bae569709 diff --git a/tools/cutadapt/test_single/main.nf b/tools/cutadapt/test_single/main.nf index 96947ad0..657e2428 100644 --- a/tools/cutadapt/test_single/main.nf +++ b/tools/cutadapt/test_single/main.nf @@ -5,10 +5,10 @@ include '../main.nf' params(params) // Define input channels readPaths = [ - ['SRR4238351', '../../../test-datasets/tools/fastqc/input/SRR4238351_subsamp.fastq.gz'], - ['SRR4238355', '../../../test-datasets/tools/fastqc/input/SRR4238355_subsamp.fastq.gz'], - ['SRR4238359', '../../../test-datasets/tools/fastqc/input/SRR4238359_subsamp.fastq.gz'], - ['SRR4238379', '../../../test-datasets/tools/fastqc/input/SRR4238379_subsamp.fastq.gz'] + ['SRR4238351', '../../../test-datasets/tools/cutadapt/input/SRR4238351_subsamp.fastq.gz'], + ['SRR4238355', '../../../test-datasets/tools/cutadapt/input/SRR4238355_subsamp.fastq.gz'], + ['SRR4238359', '../../../test-datasets/tools/cutadapt/input/SRR4238359_subsamp.fastq.gz'], + ['SRR4238379', '../../../test-datasets/tools/cutadapt/input/SRR4238379_subsamp.fastq.gz'] ] Channel .from(readPaths) From c90474202c3ff4b381ca184be6cd2a7b5914365a Mon Sep 17 00:00:00 2001 From: Piotr Faba Date: Fri, 6 Mar 2020 14:23:32 +0000 Subject: [PATCH 05/12] update meta --- tools/cutadapt/meta.yml | 62 +++++++++++++++++++---------------------- 1 file changed, 29 insertions(+), 33 deletions(-) diff --git a/tools/cutadapt/meta.yml b/tools/cutadapt/meta.yml index e4e1deb6..8df0b244 100644 --- a/tools/cutadapt/meta.yml +++ b/tools/cutadapt/meta.yml @@ -1,40 +1,36 @@ name: Cutadapt description: cutadapt removes adapter sequences from high-throughput sequencing reads keywords: - - Quality Control - - QC - - Adapters + - Quality Control + - QC + - Adapters tools: - - fastqc: - description: | - Cutadapt finds and removes adapter sequences, primers, poly-A tails and other types of unwanted sequence - from your high-throughput sequencing reads. - - Cleaning your data in this way is often required: Reads from small-RNA sequencing contain the 3’ - sequencing adapter because the read is longer than the molecule that is sequenced. Amplicon reads - start with a primer sequence. Poly-A tails are useful for pulling out RNA from your sample, but - often you don’t want them to be in your reads. - homepage: https://cutadapt.readthedocs.io/en/stable/ - documentation: https://cutadapt.readthedocs.io/en/stable/ + - fastqc: + description: | + Cutadapt finds and removes adapter sequences, primers, poly-A tails and other types of unwanted sequence + from your high-throughput sequencing reads. + + Cleaning your data in this way is often required: Reads from small-RNA sequencing contain the 3’ + sequencing adapter because the read is longer than the molecule that is sequenced. Amplicon reads + start with a primer sequence. Poly-A tails are useful for pulling out RNA from your sample, but + often you don’t want them to be in your reads. + homepage: https://cutadapt.readthedocs.io/en/stable/ + documentation: https://cutadapt.readthedocs.io/en/stable/ input: - - - sample_id: - type: string - description: Sample identifier - - input_forward_fq: - type: file - description: Input FastQ forward read file of the pair - - input_reverse_fq: - type: file - description: Input FastQ reverse read file of the pair + - + - sample_id: + type: string + description: Sample identifier + - reads: + type: file + description: Input FastQ file, or pair of files output: - - - sample_id: - type: string - description: Sample identifier - - output_forward_fq: - type: file - description: Output FastQ forward read file of the pair - - output_reverse_fq: - type: file - description: Output FastQ reverse read file of the pair + - + - sample_id: + type: string + description: Sample identifier + - reads: + type: file + description: trimmed FastQ file, or pair of files authors: - - @piotr-faba-ardigen + - @piotr-faba-ardigen From f33dd2d2e9f322535dffd572079969e6862f7a1c Mon Sep 17 00:00:00 2001 From: Piotr Faba Date: Fri, 6 Mar 2020 14:31:44 +0000 Subject: [PATCH 06/12] add ci test --- .github/workflows/cutadapt.yml | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100644 .github/workflows/cutadapt.yml diff --git a/.github/workflows/cutadapt.yml b/.github/workflows/cutadapt.yml new file mode 100644 index 00000000..c7e5d8b1 --- /dev/null +++ b/.github/workflows/cutadapt.yml @@ -0,0 +1,22 @@ +name: cutadapt +on: + push: {} + pull_request: + paths: tools/cutadapt/* + +jobs: + run_ci_test: + runs-on: ubuntu-latest + + steps: + # Check out the repository + - uses: actions/checkout@v1 + submodules: true + + - name: Install Nextflow + run: | + wget -qO- get.nextflow.io | bash + sudo mv nextflow /usr/local/bin/ + + # Test the module + - run: nextflow run ./tools/cutadapt/sort/test_paired/ -ansi-log false From 977a9b3ee8c6a425ae6521d0774bcff33f7baad9 Mon Sep 17 00:00:00 2001 From: Piotr Faba Date: Fri, 6 Mar 2020 14:34:34 +0000 Subject: [PATCH 07/12] test fix --- .github/workflows/cutadapt.yml | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/.github/workflows/cutadapt.yml b/.github/workflows/cutadapt.yml index c7e5d8b1..3058bd23 100644 --- a/.github/workflows/cutadapt.yml +++ b/.github/workflows/cutadapt.yml @@ -10,8 +10,13 @@ jobs: steps: # Check out the repository - - uses: actions/checkout@v1 - submodules: true + - uses: actions/checkout@v2 + - name: Checkout submodules + shell: bash + run: | + auth_header="$(git config --local --get http.https://github.com/.extraheader)" + git submodule sync --recursive + git -c "http.extraheader=$auth_header" -c protocol.version=2 submodule update --init --force --recursive --depth=1 - name: Install Nextflow run: | From 36cad5059347e8d420e034aecf10387fdfba9b79 Mon Sep 17 00:00:00 2001 From: Piotr Faba Date: Fri, 6 Mar 2020 14:38:49 +0000 Subject: [PATCH 08/12] test submodules checkout --- .github/workflows/cutadapt.yml | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/.github/workflows/cutadapt.yml b/.github/workflows/cutadapt.yml index 3058bd23..81c734ad 100644 --- a/.github/workflows/cutadapt.yml +++ b/.github/workflows/cutadapt.yml @@ -10,13 +10,8 @@ jobs: steps: # Check out the repository - - uses: actions/checkout@v2 - - name: Checkout submodules - shell: bash - run: | - auth_header="$(git config --local --get http.https://github.com/.extraheader)" - git submodule sync --recursive - git -c "http.extraheader=$auth_header" -c protocol.version=2 submodule update --init --force --recursive --depth=1 + - uses: actions/checkout@v1 + submodules: true - name: Install Nextflow run: | @@ -24,4 +19,4 @@ jobs: sudo mv nextflow /usr/local/bin/ # Test the module - - run: nextflow run ./tools/cutadapt/sort/test_paired/ -ansi-log false + - run: nextflow run ./tools/cutadapt/sort/test_paired/main.nf -ansi-log false From 660be047083e39bcc36c7e65d8401c7a66b6e8fd Mon Sep 17 00:00:00 2001 From: Piotr Faba Date: Fri, 6 Mar 2020 14:41:37 +0000 Subject: [PATCH 09/12] fix test --- .github/workflows/cutadapt.yml | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/.github/workflows/cutadapt.yml b/.github/workflows/cutadapt.yml index 81c734ad..0aa773d8 100644 --- a/.github/workflows/cutadapt.yml +++ b/.github/workflows/cutadapt.yml @@ -10,8 +10,13 @@ jobs: steps: # Check out the repository - - uses: actions/checkout@v1 - submodules: true + - uses: actions/checkout@v2 + - name: Checkout submodules + shell: bash + run: | + auth_header="$(git config --local --get http.https://github.com/.extraheader)" + git submodule sync --recursive + git -c "http.extraheader=$auth_header" -c protocol.version=2 submodule update --init --force --recursive --depth=1 - name: Install Nextflow run: | From 2646e55e2b1fac534c47b94d0ccc3efdcf1f5072 Mon Sep 17 00:00:00 2001 From: Piotr Faba Date: Fri, 6 Mar 2020 14:50:29 +0000 Subject: [PATCH 10/12] get test working --- .github/workflows/cutadapt.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/cutadapt.yml b/.github/workflows/cutadapt.yml index 0aa773d8..9d9f5a2c 100644 --- a/.github/workflows/cutadapt.yml +++ b/.github/workflows/cutadapt.yml @@ -24,4 +24,6 @@ jobs: sudo mv nextflow /usr/local/bin/ # Test the module - - run: nextflow run ./tools/cutadapt/sort/test_paired/main.nf -ansi-log false + - run: | + cd tools/cutadapt/sort/test_paired/ + nextflow run . -ansi-log false From fa134a1196d92a883d2f1a0cffce805482cb6dfb Mon Sep 17 00:00:00 2001 From: Piotr Faba Date: Fri, 6 Mar 2020 14:54:26 +0000 Subject: [PATCH 11/12] fix path error --- .github/workflows/cutadapt.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/cutadapt.yml b/.github/workflows/cutadapt.yml index 9d9f5a2c..c30f4ff3 100644 --- a/.github/workflows/cutadapt.yml +++ b/.github/workflows/cutadapt.yml @@ -25,5 +25,5 @@ jobs: # Test the module - run: | - cd tools/cutadapt/sort/test_paired/ + cd tools/cutadapt/test_paired/ nextflow run . -ansi-log false From 6fd86ad9e5ae26c608e80472ec8f69e2bad93ed6 Mon Sep 17 00:00:00 2001 From: Piotr Faba Date: Fri, 6 Mar 2020 14:56:33 +0000 Subject: [PATCH 12/12] 2 tests --- .github/workflows/cutadapt.yml | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/.github/workflows/cutadapt.yml b/.github/workflows/cutadapt.yml index c30f4ff3..ba92681d 100644 --- a/.github/workflows/cutadapt.yml +++ b/.github/workflows/cutadapt.yml @@ -23,7 +23,12 @@ jobs: wget -qO- get.nextflow.io | bash sudo mv nextflow /usr/local/bin/ - # Test the module - - run: | + - name: Test module with paired-end data + run: | cd tools/cutadapt/test_paired/ nextflow run . -ansi-log false + + - name: Test module with single-end data + run: | + cd tools/cutadapt/test_single/ + nextflow run . -ansi-log false