533 lines
16 KiB
Python
533 lines
16 KiB
Python
"""Numpydoc-style docstring parsing.
|
|
|
|
:see: https://numpydoc.readthedocs.io/en/latest/format.html
|
|
"""
|
|
|
|
import inspect
|
|
import itertools
|
|
import re
|
|
import typing as T
|
|
from textwrap import dedent
|
|
|
|
from .common import (
|
|
Docstring,
|
|
DocstringDeprecated,
|
|
DocstringExample,
|
|
DocstringMeta,
|
|
DocstringParam,
|
|
DocstringRaises,
|
|
DocstringReturns,
|
|
DocstringStyle,
|
|
RenderingStyle,
|
|
)
|
|
|
|
|
|
def _pairwise(iterable: T.Iterable, end=None) -> T.Iterable:
|
|
left, right = itertools.tee(iterable)
|
|
next(right, None)
|
|
return itertools.zip_longest(left, right, fillvalue=end)
|
|
|
|
|
|
def _clean_str(string: str) -> T.Optional[str]:
|
|
string = string.strip()
|
|
if len(string) > 0:
|
|
return string
|
|
return None
|
|
|
|
|
|
KV_REGEX = re.compile(r"^[^\s].*$", flags=re.M)
|
|
PARAM_KEY_REGEX = re.compile(r"^(?P<name>.*?)(?:\s*:\s*(?P<type>.*?))?$")
|
|
PARAM_OPTIONAL_REGEX = re.compile(r"(?P<type>.*?)(?:, optional|\(optional\))$")
|
|
|
|
# numpydoc format has no formal grammar for this,
|
|
# but we can make some educated guesses...
|
|
PARAM_DEFAULT_REGEX = re.compile(
|
|
r"(?<!\S)[Dd]efault(?: is | = |: |s to |)\s*(?P<value>[\w\-\.]*\w)"
|
|
)
|
|
|
|
RETURN_KEY_REGEX = re.compile(r"^(?:(?P<name>.*?)\s*:\s*)?(?P<type>.*?)$")
|
|
|
|
|
|
class Section:
|
|
"""Numpydoc section parser.
|
|
|
|
:param title: section title. For most sections, this is a heading like
|
|
"Parameters" which appears on its own line, underlined by
|
|
en-dashes ('-') on the following line.
|
|
:param key: meta key string. In the parsed ``DocstringMeta`` instance this
|
|
will be the first element of the ``args`` attribute list.
|
|
"""
|
|
|
|
def __init__(self, title: str, key: str) -> None:
|
|
self.title = title
|
|
self.key = key
|
|
|
|
@property
|
|
def title_pattern(self) -> str:
|
|
"""Regular expression pattern matching this section's header.
|
|
|
|
This pattern will match this instance's ``title`` attribute in
|
|
an anonymous group.
|
|
"""
|
|
dashes = "-" * len(self.title)
|
|
return rf"^({self.title})\s*?\n{dashes}\s*$"
|
|
|
|
def parse(self, text: str) -> T.Iterable[DocstringMeta]:
|
|
"""Parse ``DocstringMeta`` objects from the body of this section.
|
|
|
|
:param text: section body text. Should be cleaned with
|
|
``inspect.cleandoc`` before parsing.
|
|
"""
|
|
yield DocstringMeta([self.key], description=_clean_str(text))
|
|
|
|
|
|
class _KVSection(Section):
|
|
"""Base parser for numpydoc sections with key-value syntax.
|
|
|
|
E.g. sections that look like this:
|
|
key
|
|
value
|
|
key2 : type
|
|
values can also span...
|
|
... multiple lines
|
|
"""
|
|
|
|
def _parse_item(self, key: str, value: str) -> DocstringMeta:
|
|
pass
|
|
|
|
def parse(self, text: str) -> T.Iterable[DocstringMeta]:
|
|
for match, next_match in _pairwise(KV_REGEX.finditer(text)):
|
|
start = match.end()
|
|
end = next_match.start() if next_match is not None else None
|
|
value = text[start:end]
|
|
yield self._parse_item(
|
|
key=match.group(), value=inspect.cleandoc(value)
|
|
)
|
|
|
|
|
|
class _SphinxSection(Section):
|
|
"""Base parser for numpydoc sections with sphinx-style syntax.
|
|
|
|
E.g. sections that look like this:
|
|
.. title:: something
|
|
possibly over multiple lines
|
|
"""
|
|
|
|
@property
|
|
def title_pattern(self) -> str:
|
|
return rf"^\.\.\s*({self.title})\s*::"
|
|
|
|
|
|
class ParamSection(_KVSection):
|
|
"""Parser for numpydoc parameter sections.
|
|
|
|
E.g. any section that looks like this:
|
|
arg_name
|
|
arg_description
|
|
arg_2 : type, optional
|
|
descriptions can also span...
|
|
... multiple lines
|
|
"""
|
|
|
|
def _parse_item(self, key: str, value: str) -> DocstringParam:
|
|
match = PARAM_KEY_REGEX.match(key)
|
|
arg_name = type_name = is_optional = None
|
|
if match is not None:
|
|
arg_name = match.group("name")
|
|
type_name = match.group("type")
|
|
if type_name is not None:
|
|
optional_match = PARAM_OPTIONAL_REGEX.match(type_name)
|
|
if optional_match is not None:
|
|
type_name = optional_match.group("type")
|
|
is_optional = True
|
|
else:
|
|
is_optional = False
|
|
|
|
default = None
|
|
if len(value) > 0:
|
|
default_match = PARAM_DEFAULT_REGEX.search(value)
|
|
if default_match is not None:
|
|
default = default_match.group("value")
|
|
|
|
return DocstringParam(
|
|
args=[self.key, arg_name],
|
|
description=_clean_str(value),
|
|
arg_name=arg_name,
|
|
type_name=type_name,
|
|
is_optional=is_optional,
|
|
default=default,
|
|
)
|
|
|
|
|
|
class RaisesSection(_KVSection):
|
|
"""Parser for numpydoc raises sections.
|
|
|
|
E.g. any section that looks like this:
|
|
ValueError
|
|
A description of what might raise ValueError
|
|
"""
|
|
|
|
def _parse_item(self, key: str, value: str) -> DocstringRaises:
|
|
return DocstringRaises(
|
|
args=[self.key, key],
|
|
description=_clean_str(value),
|
|
type_name=key if len(key) > 0 else None,
|
|
)
|
|
|
|
|
|
class ReturnsSection(_KVSection):
|
|
"""Parser for numpydoc returns sections.
|
|
|
|
E.g. any section that looks like this:
|
|
return_name : type
|
|
A description of this returned value
|
|
another_type
|
|
Return names are optional, types are required
|
|
"""
|
|
|
|
is_generator = False
|
|
|
|
def _parse_item(self, key: str, value: str) -> DocstringReturns:
|
|
match = RETURN_KEY_REGEX.match(key)
|
|
if match is not None:
|
|
return_name = match.group("name")
|
|
type_name = match.group("type")
|
|
else:
|
|
return_name = None
|
|
type_name = None
|
|
|
|
return DocstringReturns(
|
|
args=[self.key],
|
|
description=_clean_str(value),
|
|
type_name=type_name,
|
|
is_generator=self.is_generator,
|
|
return_name=return_name,
|
|
)
|
|
|
|
|
|
class YieldsSection(ReturnsSection):
|
|
"""Parser for numpydoc generator "yields" sections."""
|
|
|
|
is_generator = True
|
|
|
|
|
|
class DeprecationSection(_SphinxSection):
|
|
"""Parser for numpydoc "deprecation warning" sections."""
|
|
|
|
def parse(self, text: str) -> T.Iterable[DocstringDeprecated]:
|
|
version, desc, *_ = text.split(sep="\n", maxsplit=1) + [None, None]
|
|
|
|
if desc is not None:
|
|
desc = _clean_str(inspect.cleandoc(desc))
|
|
|
|
yield DocstringDeprecated(
|
|
args=[self.key], description=desc, version=_clean_str(version)
|
|
)
|
|
|
|
|
|
class ExamplesSection(Section):
|
|
"""Parser for numpydoc examples sections.
|
|
|
|
E.g. any section that looks like this:
|
|
>>> import numpy.matlib
|
|
>>> np.matlib.empty((2, 2)) # filled with random data
|
|
matrix([[ 6.76425276e-320, 9.79033856e-307], # random
|
|
[ 7.39337286e-309, 3.22135945e-309]])
|
|
>>> np.matlib.empty((2, 2), dtype=int)
|
|
matrix([[ 6600475, 0], # random
|
|
[ 6586976, 22740995]])
|
|
"""
|
|
|
|
def parse(self, text: str) -> T.Iterable[DocstringMeta]:
|
|
"""Parse ``DocstringExample`` objects from the body of this section.
|
|
|
|
:param text: section body text. Should be cleaned with
|
|
``inspect.cleandoc`` before parsing.
|
|
"""
|
|
lines = dedent(text).strip().splitlines()
|
|
while lines:
|
|
snippet_lines = []
|
|
description_lines = []
|
|
while lines:
|
|
if not lines[0].startswith(">>>"):
|
|
break
|
|
snippet_lines.append(lines.pop(0))
|
|
while lines:
|
|
if lines[0].startswith(">>>"):
|
|
break
|
|
description_lines.append(lines.pop(0))
|
|
yield DocstringExample(
|
|
[self.key],
|
|
snippet="\n".join(snippet_lines) if snippet_lines else None,
|
|
description="\n".join(description_lines),
|
|
)
|
|
|
|
|
|
DEFAULT_SECTIONS = [
|
|
ParamSection("Parameters", "param"),
|
|
ParamSection("Params", "param"),
|
|
ParamSection("Arguments", "param"),
|
|
ParamSection("Args", "param"),
|
|
ParamSection("Other Parameters", "other_param"),
|
|
ParamSection("Other Params", "other_param"),
|
|
ParamSection("Other Arguments", "other_param"),
|
|
ParamSection("Other Args", "other_param"),
|
|
ParamSection("Receives", "receives"),
|
|
ParamSection("Receive", "receives"),
|
|
RaisesSection("Raises", "raises"),
|
|
RaisesSection("Raise", "raises"),
|
|
RaisesSection("Warns", "warns"),
|
|
RaisesSection("Warn", "warns"),
|
|
ParamSection("Attributes", "attribute"),
|
|
ParamSection("Attribute", "attribute"),
|
|
ReturnsSection("Returns", "returns"),
|
|
ReturnsSection("Return", "returns"),
|
|
YieldsSection("Yields", "yields"),
|
|
YieldsSection("Yield", "yields"),
|
|
ExamplesSection("Examples", "examples"),
|
|
ExamplesSection("Example", "examples"),
|
|
Section("Warnings", "warnings"),
|
|
Section("Warning", "warnings"),
|
|
Section("See Also", "see_also"),
|
|
Section("Related", "see_also"),
|
|
Section("Notes", "notes"),
|
|
Section("Note", "notes"),
|
|
Section("References", "references"),
|
|
Section("Reference", "references"),
|
|
DeprecationSection("deprecated", "deprecation"),
|
|
]
|
|
|
|
|
|
class NumpydocParser:
|
|
"""Parser for numpydoc-style docstrings."""
|
|
|
|
def __init__(self, sections: T.Optional[T.Dict[str, Section]] = None):
|
|
"""Setup sections.
|
|
|
|
:param sections: Recognized sections or None to defaults.
|
|
"""
|
|
sections = sections or DEFAULT_SECTIONS
|
|
self.sections = {s.title: s for s in sections}
|
|
self._setup()
|
|
|
|
def _setup(self):
|
|
self.titles_re = re.compile(
|
|
r"|".join(s.title_pattern for s in self.sections.values()),
|
|
flags=re.M,
|
|
)
|
|
|
|
def add_section(self, section: Section):
|
|
"""Add or replace a section.
|
|
|
|
:param section: The new section.
|
|
"""
|
|
|
|
self.sections[section.title] = section
|
|
self._setup()
|
|
|
|
def parse(self, text: str) -> Docstring:
|
|
"""Parse the numpy-style docstring into its components.
|
|
|
|
:returns: parsed docstring
|
|
"""
|
|
ret = Docstring(style=DocstringStyle.NUMPYDOC)
|
|
if not text:
|
|
return ret
|
|
|
|
# Clean according to PEP-0257
|
|
text = inspect.cleandoc(text)
|
|
|
|
# Find first title and split on its position
|
|
match = self.titles_re.search(text)
|
|
if match:
|
|
desc_chunk = text[: match.start()]
|
|
meta_chunk = text[match.start() :]
|
|
else:
|
|
desc_chunk = text
|
|
meta_chunk = ""
|
|
|
|
# Break description into short and long parts
|
|
parts = desc_chunk.split("\n", 1)
|
|
ret.short_description = parts[0] or None
|
|
if len(parts) > 1:
|
|
long_desc_chunk = parts[1] or ""
|
|
ret.blank_after_short_description = long_desc_chunk.startswith(
|
|
"\n"
|
|
)
|
|
ret.blank_after_long_description = long_desc_chunk.endswith("\n\n")
|
|
ret.long_description = long_desc_chunk.strip() or None
|
|
|
|
for match, nextmatch in _pairwise(self.titles_re.finditer(meta_chunk)):
|
|
title = next(g for g in match.groups() if g is not None)
|
|
factory = self.sections[title]
|
|
|
|
# section chunk starts after the header,
|
|
# ends at the start of the next header
|
|
start = match.end()
|
|
end = nextmatch.start() if nextmatch is not None else None
|
|
ret.meta.extend(factory.parse(meta_chunk[start:end]))
|
|
|
|
return ret
|
|
|
|
|
|
def parse(text: str) -> Docstring:
|
|
"""Parse the numpy-style docstring into its components.
|
|
|
|
:returns: parsed docstring
|
|
"""
|
|
return NumpydocParser().parse(text)
|
|
|
|
|
|
def compose(
|
|
# pylint: disable=W0613
|
|
docstring: Docstring,
|
|
rendering_style: RenderingStyle = RenderingStyle.COMPACT,
|
|
indent: str = " ",
|
|
) -> str:
|
|
"""Render a parsed docstring into docstring text.
|
|
|
|
:param docstring: parsed docstring representation
|
|
:param rendering_style: the style to render docstrings
|
|
:param indent: the characters used as indentation in the docstring string
|
|
:returns: docstring text
|
|
"""
|
|
|
|
def process_one(
|
|
one: T.Union[DocstringParam, DocstringReturns, DocstringRaises],
|
|
):
|
|
if isinstance(one, DocstringParam):
|
|
head = one.arg_name
|
|
elif isinstance(one, DocstringReturns):
|
|
head = one.return_name
|
|
else:
|
|
head = None
|
|
|
|
if one.type_name and head:
|
|
head += f" : {one.type_name}"
|
|
elif one.type_name:
|
|
head = one.type_name
|
|
elif not head:
|
|
head = ""
|
|
|
|
if isinstance(one, DocstringParam) and one.is_optional:
|
|
head += ", optional"
|
|
|
|
if one.description:
|
|
body = f"\n{indent}".join([head] + one.description.splitlines())
|
|
parts.append(body)
|
|
else:
|
|
parts.append(head)
|
|
|
|
def process_sect(name: str, args: T.List[T.Any]):
|
|
if args:
|
|
parts.append("")
|
|
parts.append(name)
|
|
parts.append("-" * len(parts[-1]))
|
|
for arg in args:
|
|
process_one(arg)
|
|
|
|
parts: T.List[str] = []
|
|
if docstring.short_description:
|
|
parts.append(docstring.short_description)
|
|
if docstring.blank_after_short_description:
|
|
parts.append("")
|
|
|
|
if docstring.deprecation:
|
|
first = ".. deprecated::"
|
|
if docstring.deprecation.version:
|
|
first += f" {docstring.deprecation.version}"
|
|
if docstring.deprecation.description:
|
|
rest = docstring.deprecation.description.splitlines()
|
|
else:
|
|
rest = []
|
|
sep = f"\n{indent}"
|
|
parts.append(sep.join([first] + rest))
|
|
|
|
if docstring.long_description:
|
|
parts.append(docstring.long_description)
|
|
if docstring.blank_after_long_description:
|
|
parts.append("")
|
|
|
|
process_sect(
|
|
"Parameters",
|
|
[item for item in docstring.params or [] if item.args[0] == "param"],
|
|
)
|
|
|
|
process_sect(
|
|
"Attributes",
|
|
[
|
|
item
|
|
for item in docstring.params or []
|
|
if item.args[0] == "attribute"
|
|
],
|
|
)
|
|
|
|
process_sect(
|
|
"Returns",
|
|
[
|
|
item
|
|
for item in docstring.many_returns or []
|
|
if not item.is_generator
|
|
],
|
|
)
|
|
|
|
process_sect(
|
|
"Yields",
|
|
[item for item in docstring.many_returns or [] if item.is_generator],
|
|
)
|
|
|
|
if docstring.returns and not docstring.many_returns:
|
|
ret = docstring.returns
|
|
parts.append("Yields" if ret else "Returns")
|
|
parts.append("-" * len(parts[-1]))
|
|
process_one(ret)
|
|
|
|
process_sect(
|
|
"Receives",
|
|
[
|
|
item
|
|
for item in docstring.params or []
|
|
if item.args[0] == "receives"
|
|
],
|
|
)
|
|
|
|
process_sect(
|
|
"Other Parameters",
|
|
[
|
|
item
|
|
for item in docstring.params or []
|
|
if item.args[0] == "other_param"
|
|
],
|
|
)
|
|
|
|
process_sect(
|
|
"Raises",
|
|
[item for item in docstring.raises or [] if item.args[0] == "raises"],
|
|
)
|
|
|
|
process_sect(
|
|
"Warns",
|
|
[item for item in docstring.raises or [] if item.args[0] == "warns"],
|
|
)
|
|
|
|
for meta in docstring.meta:
|
|
if isinstance(
|
|
meta,
|
|
(
|
|
DocstringDeprecated,
|
|
DocstringParam,
|
|
DocstringReturns,
|
|
DocstringRaises,
|
|
),
|
|
):
|
|
continue # Already handled
|
|
|
|
parts.append("")
|
|
parts.append(meta.args[0].replace("_", "").title())
|
|
parts.append("-" * len(meta.args[0]))
|
|
|
|
if meta.description:
|
|
parts.append(meta.description)
|
|
|
|
return "\n".join(parts)
|