Merge pull request #25 from piotr-faba-ardigen/add_cutadapt_tool

Add cutadapt tool
2025-01-02 20:52:07 -05:00 · 2020-03-16 18:46:00 +01:00 · 2020-03-16 18:46:00 +01:00 · 9dee710b04
commit 9dee710b04
parent 7738dbe4df 6fd86ad9e5
8 changed files with 169 additions and 1 deletions
--- a/.github/workflows/cutadapt.yml
+++ b/.github/workflows/cutadapt.yml
@ -0,0 +1,34 @@
 name: cutadapt
 on:
  push: {}
  pull_request:
    paths: tools/cutadapt/*
 jobs:
  run_ci_test:
    runs-on: ubuntu-latest
    steps:
      # Check out the repository
      - uses: actions/checkout@v2
      - name: Checkout submodules
        shell: bash
        run: |
          auth_header="$(git config --local --get http.https://github.com/.extraheader)"
          git submodule sync --recursive
          git -c "http.extraheader=$auth_header" -c protocol.version=2 submodule update --init --force --recursive --depth=1
      - name: Install Nextflow
        run: |
          wget -qO- get.nextflow.io | bash
          sudo mv nextflow /usr/local/bin/
      - name: Test module with paired-end data
        run: |
          cd tools/cutadapt/test_paired/
          nextflow run . -ansi-log false
      - name: Test module with single-end data
        run: |
          cd tools/cutadapt/test_single/
          nextflow run . -ansi-log false
--- a/2
+++ b/2
@ -1 +1 @@
-Subproject commit e5fef88994b8d34c7bf4b07116e5f7a330d2ee3b
+Subproject commit aae85a5c9c72238959108212481ce83bae569709
--- a/tools/cutadapt/main.nf
+++ b/tools/cutadapt/main.nf
@ -0,0 +1,45 @@
 process cutadapt {
    tag "${sample_id}"
    container 'quay.io/biocontainers/cutadapt:1.16--py27_1'
    input:
    tuple val(sample_id), file(reads)
    output:
    tuple sample_id, file("trimmed_*.fastq")
    script:
    forward_fq = "trimmed_1.fastq"
    reverse_fq = "trimmed_2.fastq"
    if (params.singleEnd) {
        processing = """
                    cutadapt \
 		            	-j ${task.cpus} \
 		            	-q $params.cutadapt_min_quality \
 		            	--minimum-length $params.cutadapt_min_length \
 		            	--output ${forward_fq} \
 		            	${reads}
                    """
    } else {
        processing = """
                    cutadapt \
 		            	-j ${task.cpus} \
 		            	-q $params.cutadapt_min_quality \
 		            	--minimum-length $params.cutadapt_min_length \
 		            	--pair-filter=any \
 		            	--output ${forward_fq} \
 		            	--paired-output ${reverse_fq} ${reads}
                    """
    }
    version = """
    cutadapt --version &> v_cutadapt.txt
    """
    return processing + version
 }
--- a/tools/cutadapt/meta.yml
+++ b/tools/cutadapt/meta.yml
@ -0,0 +1,36 @@
 name: Cutadapt
 description: cutadapt removes adapter sequences from high-throughput sequencing reads
 keywords:
    - Quality Control
    - QC
    - Adapters
 tools:
    - fastqc:
        description: |
            Cutadapt finds and removes adapter sequences, primers, poly-A tails and other types of unwanted sequence
            from your high-throughput sequencing reads.
            Cleaning your data in this way is often required: Reads from small-RNA sequencing contain the 3’
            sequencing adapter because the read is longer than the molecule that is sequenced. Amplicon reads 
            start with a primer sequence. Poly-A tails are useful for pulling out RNA from your sample, but 
            often you don’t want them to be in your reads.
        homepage: https://cutadapt.readthedocs.io/en/stable/
        documentation: https://cutadapt.readthedocs.io/en/stable/
 input:
    -
        - sample_id:
            type: string
            description: Sample identifier
        - reads:
            type: file
            description: Input FastQ file, or pair of files
 output:
    -
        - sample_id:
            type: string
            description: Sample identifier
        - reads:
            type: file
            description: trimmed FastQ file, or pair of files
 authors:
    - @piotr-faba-ardigen
--- a/tools/cutadapt/test_paired/main.nf
+++ b/tools/cutadapt/test_paired/main.nf
@ -0,0 +1,14 @@
 #!/usr/bin/env nextflow
 nextflow.preview.dsl = 2
 include '../main.nf' params(params)
 // Define input channels
 Channel
  .fromFilePairs('../../../test-datasets/tools/cutadapt/input/*_{1,2}.fastq' )
  .set { ch_read_files }
 // Run the workflow
 workflow {
    cutadapt(ch_read_files)
 }
--- a/tools/cutadapt/test_paired/nextflow.config
+++ b/tools/cutadapt/test_paired/nextflow.config
@ -0,0 +1,9 @@
 docker.enabled = true
 params.outdir = './results'
 params{    
    //preprocessing options
    cutadapt_min_length = 40
    cutadapt_min_quality = 25
    singleEnd = false
 }
--- a/tools/cutadapt/test_single/main.nf
+++ b/tools/cutadapt/test_single/main.nf
@ -0,0 +1,21 @@
 #!/usr/bin/env nextflow
 nextflow.preview.dsl = 2
 include '../main.nf' params(params)
 // Define input channels
 readPaths = [
  ['SRR4238351', '../../../test-datasets/tools/cutadapt/input/SRR4238351_subsamp.fastq.gz'],
  ['SRR4238355', '../../../test-datasets/tools/cutadapt/input/SRR4238355_subsamp.fastq.gz'],
  ['SRR4238359', '../../../test-datasets/tools/cutadapt/input/SRR4238359_subsamp.fastq.gz'],
  ['SRR4238379', '../../../test-datasets/tools/cutadapt/input/SRR4238379_subsamp.fastq.gz']
 ]
 Channel
  .from(readPaths)
  .map { row -> [ row[0], [ file(row[1]) ] ] }
  .set { ch_read_files }
 // Run the workflow
 workflow {
    cutadapt(ch_read_files)
 }
--- a/tools/cutadapt/test_single/nextflow.config
+++ b/tools/cutadapt/test_single/nextflow.config
@ -0,0 +1,9 @@
 docker.enabled = true
 params.outdir = './results'
 params{    
    //preprocessing options
    cutadapt_min_length = 40
    cutadapt_min_quality = 25
    singleEnd = true
 }
		`@ -1 +1 @@`
			`Subproject commit e5fef88994b8d34c7bf4b07116e5f7a330d2ee3b`				`Subproject commit aae85a5c9c72238959108212481ce83bae569709`