nfdocs-parser/nfdocs-parser.py

#!/usr/bin/env python
import sys
import yaml
from docutils import nodes

# Declare the docstring starting characters
DOC_STARTER = "/// "

def definition_type(signature):
    # Returns "name", workflow|process|function
    def_type = "unknown"
    if "workflow" in signature:
        def_type = "workflow"
    elif "process" in signature:
        def_type = "process"
    elif "function" in signature:
        def_type = "function"

    # Check if any signature was recognized
    if def_type == "unknown":
        return "unknown", "an error occurred"

    # Parse out the definition name
    def_name = signature.replace(def_type, "").replace("{", "").strip()

    # Return the results
    return def_name, def_type

def params_to_list(params):
    if "tuple" in params.keys():
        tuple_item = nodes.list_item()
        if "name" in params.keys():
            tuple_item += nodes.paragraph(text=params["name"])
        tuple_item += nodes.paragraph(text="Tuple:")
        tuple_list = nodes.bullet_list()
        for io in params["tuple"]:
            tuple_list += params_to_list(io)
        tuple_item += tuple_list
        return tuple_item
    else:
        print(params)
        io_item = nodes.list_item()
        if "name" in params.keys():
            io_item += nodes.paragraph(text=params["name"])
        io_item += nodes.paragraph(text=f"Type: {params['type']}")
        io_item += nodes.paragraph(text=params["description"])
        return io_item

# Take path as single argument for now
nextflow_path = sys.argv[1]
with open(nextflow_path) as nextflow_file:

    # Split by lines
    nextflow_lines = nextflow_file.readlines()

    # Declare some variables to keep track of where the docstrings begin and end
    doc_start = 0
    doc_end = 0

    # Declare dictionaries to keep track of the docstrings
    docstring_positions = []

    # Calculate the start and end positions of each docstring
    for i, line in enumerate(nextflow_lines):
        # Check if this is a docstring
        if line.startswith(DOC_STARTER):
            # It is: check the next and previous lines to see if this is part of a block
            line_previous = nextflow_lines[i-1]
            line_next = nextflow_lines[i+1]
            if not line_previous.startswith(DOC_STARTER):
                doc_start = i
            if not line_next.startswith(DOC_STARTER):
                doc_end = i

            # Check if we've reached the end of a docstring block
            if doc_end == i:
                # Add this docstring position to the array
                docstring_positions.append(range(doc_start, doc_end+1))

    # Create dictionaries for each of the block types
    docstrings = {
        "process": {},
        "workflow": {},
        "function": {}
    }

    # Parse out the docstrings and put them in the appropriate dictionary
    for pos in docstring_positions:
        proc_name, proc_type = definition_type(nextflow_lines[pos[-1]+1])
        doc_yaml = ""
        for i in pos:
            doc_yaml = doc_yaml + nextflow_lines[i].replace(DOC_STARTER, "")
        docstrings[proc_type][proc_name] = yaml.safe_load(doc_yaml)

    # Display the results so far
    print(docstrings)
Add nfdocs-parser.py Signed-off-by: Thomas A. Christensen II <25492070+MillironX@users.noreply.github.com> 2022-01-05 19:15:29 +00:00			`#!/usr/bin/env python`
Add argument parsing to nfdocs-parser.py Signed-off-by: Thomas A. Christensen II <25492070+MillironX@users.noreply.github.com> 2022-01-05 19:16:43 +00:00			`import sys`
Add docstring parsing per process Signed-off-by: Thomas A. Christensen II <25492070+MillironX@users.noreply.github.com> 2022-01-05 20:09:31 +00:00			`import yaml`
Add regular input/output parsing to parameter parsing function Signed-off-by: Thomas A. Christensen II <25492070+MillironX@users.noreply.github.com> 2022-01-05 22:22:44 +00:00			`from docutils import nodes`
Add argument parsing to nfdocs-parser.py Signed-off-by: Thomas A. Christensen II <25492070+MillironX@users.noreply.github.com> 2022-01-05 19:16:43 +00:00
Add finding positions of docstrings Signed-off-by: Thomas A. Christensen II <25492070+MillironX@users.noreply.github.com> 2022-01-05 19:36:10 +00:00			`# Declare the docstring starting characters`
			`DOC_STARTER = "/// "`

Add definition_type function to parse out function names and types Signed-off-by: Thomas A. Christensen II <25492070+MillironX@users.noreply.github.com> 2022-01-05 19:52:14 +00:00			`def definition_type(signature):`
			`# Returns "name", workflow\|process\|function`
			`def_type = "unknown"`
			`if "workflow" in signature:`
			`def_type = "workflow"`
			`elif "process" in signature:`
			`def_type = "process"`
			`elif "function" in signature:`
			`def_type = "function"`

			`# Check if any signature was recognized`
			`if def_type == "unknown":`
			`return "unknown", "an error occurred"`

			`# Parse out the definition name`
			`def_name = signature.replace(def_type, "").replace("{", "").strip()`

			`# Return the results`
			`return def_name, def_type`

Add a function that can parse recursive tuples Signed-off-by: Thomas A. Christensen II <25492070+MillironX@users.noreply.github.com> 2022-01-05 21:19:47 +00:00			`def params_to_list(params):`
			`if "tuple" in params.keys():`
Add named output parsing to tuple parsing function Signed-off-by: Thomas A. Christensen II <25492070+MillironX@users.noreply.github.com> 2022-01-05 22:21:21 +00:00			`tuple_item = nodes.list_item()`
			`if "name" in params.keys():`
			`tuple_item += nodes.paragraph(text=params["name"])`
			`tuple_item += nodes.paragraph(text="Tuple:")`
Add a function that can parse recursive tuples Signed-off-by: Thomas A. Christensen II <25492070+MillironX@users.noreply.github.com> 2022-01-05 21:19:47 +00:00			`tuple_list = nodes.bullet_list()`
			`for io in params["tuple"]:`
			`tuple_list += params_to_list(io)`
			`tuple_item += tuple_list`
			`return tuple_item`
			`else:`
Add regular input/output parsing to parameter parsing function Signed-off-by: Thomas A. Christensen II <25492070+MillironX@users.noreply.github.com> 2022-01-05 22:22:44 +00:00			`print(params)`
			`io_item = nodes.list_item()`
			`if "name" in params.keys():`
			`io_item += nodes.paragraph(text=params["name"])`
			`io_item += nodes.paragraph(text=f"Type: {params['type']}")`
			`io_item += nodes.paragraph(text=params["description"])`
			`return io_item`
Add a function that can parse recursive tuples Signed-off-by: Thomas A. Christensen II <25492070+MillironX@users.noreply.github.com> 2022-01-05 21:19:47 +00:00
Add comments to parser Signed-off-by: Thomas A. Christensen II <25492070+MillironX@users.noreply.github.com> 2022-01-05 19:20:35 +00:00			`# Take path as single argument for now`
Add file reading to parser Signed-off-by: Thomas A. Christensen II <25492070+MillironX@users.noreply.github.com> 2022-01-05 19:17:47 +00:00			`nextflow_path = sys.argv[1]`
			`with open(nextflow_path) as nextflow_file:`
Add comments to parser Signed-off-by: Thomas A. Christensen II <25492070+MillironX@users.noreply.github.com> 2022-01-05 19:20:35 +00:00
			`# Split by lines`
Add line extraction to parser Signed-off-by: Thomas A. Christensen II <25492070+MillironX@users.noreply.github.com> 2022-01-05 19:19:33 +00:00			`nextflow_lines = nextflow_file.readlines()`
Add comments to parser Signed-off-by: Thomas A. Christensen II <25492070+MillironX@users.noreply.github.com> 2022-01-05 19:20:35 +00:00
Add finding positions of docstrings Signed-off-by: Thomas A. Christensen II <25492070+MillironX@users.noreply.github.com> 2022-01-05 19:36:10 +00:00			`# Declare some variables to keep track of where the docstrings begin and end`
			`doc_start = 0`
			`doc_end = 0`

			`# Declare dictionaries to keep track of the docstrings`
			`docstring_positions = []`

			`# Calculate the start and end positions of each docstring`
			`for i, line in enumerate(nextflow_lines):`
			`# Check if this is a docstring`
			`if line.startswith(DOC_STARTER):`
			`# It is: check the next and previous lines to see if this is part of a block`
			`line_previous = nextflow_lines[i-1]`
			`line_next = nextflow_lines[i+1]`
			`if not line_previous.startswith(DOC_STARTER):`
			`doc_start = i`
			`if not line_next.startswith(DOC_STARTER):`
			`doc_end = i`

			`# Check if we've reached the end of a docstring block`
			`if doc_end == i:`
			`# Add this docstring position to the array`
Fix indexing error cutting off last line of docstrings Signed-off-by: Thomas A. Christensen II <25492070+MillironX@users.noreply.github.com> 2022-01-05 22:20:24 +00:00			`docstring_positions.append(range(doc_start, doc_end+1))`
Add finding positions of docstrings Signed-off-by: Thomas A. Christensen II <25492070+MillironX@users.noreply.github.com> 2022-01-05 19:36:10 +00:00
Add docstring parsing per process Signed-off-by: Thomas A. Christensen II <25492070+MillironX@users.noreply.github.com> 2022-01-05 20:09:31 +00:00			`# Create dictionaries for each of the block types`
Make docstring dictionary more generic Signed-off-by: Thomas A. Christensen II <25492070+MillironX@users.noreply.github.com> 2022-01-05 20:22:48 +00:00			`docstrings = {`
			`"process": {},`
			`"workflow": {},`
			`"function": {}`
			`}`
Add docstring parsing per process Signed-off-by: Thomas A. Christensen II <25492070+MillironX@users.noreply.github.com> 2022-01-05 20:09:31 +00:00
			`# Parse out the docstrings and put them in the appropriate dictionary`
			`for pos in docstring_positions:`
Fix indexing error cutting off last line of docstrings Signed-off-by: Thomas A. Christensen II <25492070+MillironX@users.noreply.github.com> 2022-01-05 22:20:24 +00:00			`proc_name, proc_type = definition_type(nextflow_lines[pos[-1]+1])`
Add docstring parsing per process Signed-off-by: Thomas A. Christensen II <25492070+MillironX@users.noreply.github.com> 2022-01-05 20:09:31 +00:00			`doc_yaml = ""`
			`for i in pos:`
			`doc_yaml = doc_yaml + nextflow_lines[i].replace(DOC_STARTER, "")`
Make docstring dictionary more generic Signed-off-by: Thomas A. Christensen II <25492070+MillironX@users.noreply.github.com> 2022-01-05 20:22:48 +00:00			`docstrings[proc_type][proc_name] = yaml.safe_load(doc_yaml)`
Add docstring parsing per process Signed-off-by: Thomas A. Christensen II <25492070+MillironX@users.noreply.github.com> 2022-01-05 20:09:31 +00:00
Add finding positions of docstrings Signed-off-by: Thomas A. Christensen II <25492070+MillironX@users.noreply.github.com> 2022-01-05 19:36:10 +00:00			`# Display the results so far`
Make docstring dictionary more generic Signed-off-by: Thomas A. Christensen II <25492070+MillironX@users.noreply.github.com> 2022-01-05 20:22:48 +00:00			`print(docstrings)`