youtube-summarizer/venv311/lib/python3.11/site-packages/docutils/parsers/rst/directives/misc.py

# $Id: misc.py 10126 2025-05-13 08:37:56Z milde $
# Authors: David Goodger <goodger@python.org>; Dethe Elza
# Copyright: This module has been placed in the public domain.

"""Miscellaneous directives."""

from __future__ import annotations

__docformat__ = 'reStructuredText'

import re
import time
from pathlib import Path
from urllib.request import urlopen
from urllib.error import URLError

from docutils import frontend, io, nodes, statemachine, utils
from docutils.parsers.rst import Directive, convert_directive_function
from docutils.parsers.rst import directives, roles, states
from docutils.parsers.rst.directives.body import CodeBlock, NumberLines
from docutils.transforms import misc

TYPE_CHECKING = False
if TYPE_CHECKING:
    from docutils.nodes import StrPath


def adapt_path(path: str, source='', root_prefix='') -> str:
    # Adapt path to files to include or embed.
    # `root_prefix` is prepended to absolute paths (cf. root_prefix setting),
    # `source` is the `current_source` of the including directive (which may
    # be a file included by the main document).
    if root_prefix and path.startswith('/'):
        base = Path(root_prefix)
        path = path[1:]
    else:
        base = Path(source).parent
    # pepend "base" and convert to relative path for shorter system messages
    return utils.relative_path(None, base/path)


class Include(Directive):

    """
    Include content read from a separate source file.

    Content may be parsed by the parser, or included as a literal
    block.  The encoding of the included file can be specified.  Only
    a part of the given file argument may be included by specifying
    start and end line or text to match before and/or after the text
    to be used.

    https://docutils.sourceforge.io/docs/ref/rst/directives.html#include
    """

    required_arguments = 1
    optional_arguments = 0
    final_argument_whitespace = True
    option_spec = {'literal': directives.flag,
                   'code': directives.unchanged,
                   'encoding': directives.encoding,
                   'parser': directives.parser_name,
                   'tab-width': int,
                   'start-line': int,
                   'end-line': int,
                   'start-after': directives.unchanged_required,
                   'end-before': directives.unchanged_required,
                   # ignored except for 'literal' or 'code':
                   'number-lines': directives.value_or((None,), int),
                   'class': directives.class_option,
                   'name': directives.unchanged}

    standard_include_path = Path(states.__file__).parent / 'include'

    def run(self) -> list:
        """Include a file as part of the content of this reST file.

        Depending on the options, the file content (or a clipping) is
        converted to nodes and returned or inserted into the input stream.
        """
        self.settings = settings = self.state.document.settings
        if not settings.file_insertion_enabled:
            raise self.warning('"%s" directive disabled.' % self.name)
        self.tab_width = self.options.get('tab-width', settings.tab_width)
        self.clip_options = (self.options.get('start-line', None),
                             self.options.get('end-line', None),
                             self.options.get('start-after', ''),
                             self.options.get('end-before', ''))
        path = directives.path(self.arguments[0])
        if path.startswith('<') and path.endswith('>'):
            path = '/' + path[1:-1]
            root_prefix = self.standard_include_path
        else:
            root_prefix = settings.root_prefix
        path = adapt_path(path,
                          self.state.document.current_source,
                          root_prefix)
        self.options['source'] = path

        inputstring = self.read_file(path)

        if 'literal' in self.options:
            return self.as_literal_block(inputstring)
        if 'code' in self.options:
            return self.as_code_block(inputstring)
        if 'parser' in self.options:
            return self.custom_parse(inputstring)
        self.insert_into_input_lines(inputstring)
        return []

    def read_file(self, path: StrPath) -> str:
        """Read text file at `path`. Clip and return content.

        Provisional.
        """
        encoding = self.options.get('encoding', self.settings.input_encoding)
        error_handler = self.settings.input_encoding_error_handler
        try:
            include_file = io.FileInput(source_path=path,
                                        encoding=encoding,
                                        error_handler=error_handler)
        except UnicodeEncodeError:
            raise self.severe(f'Problems with "{self.name}" directive path:\n'
                              f'Cannot encode input file path "{path}" '
                              '(wrong locale?).')
        except OSError as error:
            raise self.severe(f'Problems with "{self.name}" directive path:\n'
                              f'{io.error_string(error)}.')
        else:
            self.settings.record_dependencies.add(path)
        try:
            text = include_file.read()
        except UnicodeError as error:
            raise self.severe(f'Problem with "{self.name}" directive:\n'
                              + io.error_string(error))
        # Clip to-be-included content
        startline, endline, starttext, endtext = self.clip_options
        if startline or (endline is not None):
            lines = text.splitlines()
            text = '\n'.join(lines[startline:endline])
        # start-after/end-before: no restrictions on newlines in match-text,
        # and no restrictions on matching inside lines vs. line boundaries
        if starttext:
            # skip content in text before *and incl.* a matching text
            after_index = text.find(starttext)
            if after_index < 0:
                raise self.severe('Problem with "start-after" option of '
                                  f'"{self.name}" directive:\nText not found.')
            text = text[after_index + len(starttext):]
        if endtext:
            # skip content in text after *and incl.* a matching text
            before_index = text.find(endtext)
            if before_index < 0:
                raise self.severe('Problem with "end-before" option of '
                                  f'"{self.name}" directive:\nText not found.')
            text = text[:before_index]
        return text

    def as_literal_block(self, text: str) -> list[nodes.literal_block]:
        """Return list with literal_block containing `text`.

        Provisional
        """
        source = self.options['source']
        # Convert tabs to spaces unless `tab_width` is negative.
        if self.tab_width >= 0:
            text = text.expandtabs(self.tab_width)
        literal_block = nodes.literal_block(
            '', source=source, classes=self.options.get('class', []))
        literal_block.source = source
        literal_block.line = self.options.get('start-line', 0) + 1
        self.add_name(literal_block)
        if 'number-lines' in self.options:
            firstline = self.options['number-lines'] or 1
            text = text.removesuffix('\n')
            lastline = firstline + len(text.splitlines())
            tokens = NumberLines([([], text)], firstline, lastline)
            for classes, value in tokens:
                if classes:
                    literal_block += nodes.inline('', value, classes=classes)
                else:
                    literal_block += nodes.Text(value)
        else:
            literal_block += nodes.Text(text)
        return [literal_block]

    def as_code_block(self, text: str) -> list[nodes.literal_block]:
        """Pass `text` to the `CodeBlock` directive class.

        Provisional.
        """
        # convert tabs to spaces unless `tab_width` is negative:
        if self.tab_width >= 0:
            text = text.expandtabs(self.tab_width)
        codeblock = CodeBlock(self.name,
                              [self.options.pop('code')],  # pass as argument
                              self.options,
                              [text.removesuffix('\n')],   # content
                              self.lineno,
                              self.content_offset,
                              self.block_text,
                              self.state,
                              self.state_machine,
                              )
        return codeblock.run()

    def custom_parse(self, text: str) -> list:
        """Parse with custom parser.

        Parse with ``self.options['parser']`` into a new (dummy) document,
        apply the parser's default transforms,
        return child elements.

        Provisional.
        """
        parser = self.options['parser']()
        settings = frontend.get_default_settings(parser)
        # update with current document settings
        for k, v in self.settings.__dict__.items():
            setattr(settings, k, v)
        settings._source = self.options['source']
        document = utils.new_document(settings._source, settings)
        document.include_log = self.state.document.include_log
        document.ids = self.state.document.ids
        document.nameids = self.state.document.nameids
        document.nametypes = self.state.document.nametypes
        parser.parse(text, document)
        self.state.document.parse_messages.extend(document.parse_messages)
        # clean up doctree and complete parsing
        document.transformer.populate_from_components((parser,))
        document.transformer.apply_transforms()
        self.state.document.transform_messages.extend(
            document.transform_messages)
        return document.children

    def insert_into_input_lines(self, text: str) -> None:
        """Insert file content into the rST input of the calling parser.

        Returns an empty list to comply with the API of `Directive.run()`.

        Provisional.
        """
        source = self.options['source']
        textlines = statemachine.string2lines(text, self.tab_width,
                                              convert_whitespace=True)
        # Sanity checks:
        # excessively long lines
        for i, line in enumerate(textlines):
            if len(line) > self.settings.line_length_limit:
                line_no = i + 1 + self.options.get('start-line', 0)
                raise self.warning(f'"{source}": line {line_no} exceeds the'
                                   ' line-length-limit.')
        # circular inclusion
        include_log = self.state.document.include_log
        if not include_log:  # new document, initialize with document source
            current_source = utils.relative_path(
                                None, self.state.document.current_source)
            include_log.append((current_source, (None, None, '', '')))
        if (source, self.clip_options) in include_log:
            source_chain = (pth for (pth, opt) in reversed(include_log))
            inclusion_chain = '\n> '.join((source, *source_chain))
            raise self.warning(f'circular inclusion in "{self.name}"'
                               f' directive:\n{inclusion_chain}')
        include_log.append((source, self.clip_options))
        # marker for removing log entry (cf. parsers.rst.states.Body.comment())
        textlines += ['', f'.. end of inclusion from "{source}"']

        self.state_machine.insert_input(textlines, source)
        # TODO: if startline != 0, line numbers are wrong.


class Raw(Directive):

    """
    Pass through content unchanged

    Content is included in output based on type argument

    Content may be included inline (content section of directive) or
    imported from a file or url.
    """

    required_arguments = 1
    optional_arguments = 0
    final_argument_whitespace = True
    option_spec = {'file': directives.path,
                   'url': directives.uri,
                   'encoding': directives.encoding,
                   'class': directives.class_option}
    has_content = True

    def run(self):
        settings = self.state.document.settings
        if (not settings.raw_enabled
            or (not settings.file_insertion_enabled
                and ('file' in self.options or 'url' in self.options))):
            raise self.warning('"%s" directive disabled.' % self.name)
        attributes = {'format': ' '.join(self.arguments[0].lower().split())}
        encoding = self.options.get('encoding', settings.input_encoding)
        error_handler = settings.input_encoding_error_handler
        if self.content:
            if 'file' in self.options or 'url' in self.options:
                raise self.error(
                    '"%s" directive may not both specify an external file '
                    'and have content.' % self.name)
            text = '\n'.join(self.content)
        elif 'file' in self.options:
            if 'url' in self.options:
                raise self.error(
                    'The "file" and "url" options may not be simultaneously '
                    'specified for the "%s" directive.' % self.name)
            path = adapt_path(self.options['file'],
                              self.state.document.current_source,
                              settings.root_prefix)
            try:
                raw_file = io.FileInput(source_path=path,
                                        encoding=encoding,
                                        error_handler=error_handler)
            except OSError as error:
                raise self.severe(f'Problems with "{self.name}" directive '
                                  f'path:\n{io.error_string(error)}.')
            else:
                # TODO: currently, raw input files are recorded as
                # dependencies even if not used for the chosen output format.
                settings.record_dependencies.add(path)
            try:
                text = raw_file.read()
            except UnicodeError as error:
                raise self.severe(f'Problem with "{self.name}" directive:\n'
                                  + io.error_string(error))
            attributes['source'] = path
        elif 'url' in self.options:
            source = self.options['url']
            try:
                raw_text = urlopen(source).read()
            except (URLError, OSError) as error:
                raise self.severe(f'Problems with "{self.name}" directive URL '
                                  f'"{self.options["url"]}":\n'
                                  f'{io.error_string(error)}.')
            raw_file = io.StringInput(source=raw_text, source_path=source,
                                      encoding=encoding,
                                      error_handler=error_handler)
            try:
                text = raw_file.read()
            except UnicodeError as error:
                raise self.severe(f'Problem with "{self.name}" directive:\n'
                                  + io.error_string(error))
            attributes['source'] = source
        else:
            # This will always fail because there is no content.
            self.assert_has_content()
        raw_node = nodes.raw('', text, classes=self.options.get('class', []),
                             **attributes)
        (raw_node.source,
         raw_node.line) = self.state_machine.get_source_and_line(self.lineno)
        return [raw_node]


class Replace(Directive):

    has_content = True

    def run(self):
        if not isinstance(self.state, states.SubstitutionDef):
            raise self.error(
                'Invalid context: the "%s" directive can only be used within '
                'a substitution definition.' % self.name)
        self.assert_has_content()
        text = '\n'.join(self.content)
        element = nodes.Element(text)
        self.state.nested_parse(self.content, self.content_offset,
                                element)
        # element might contain [paragraph] + system_message(s)
        node = None
        messages = []
        for elem in element:
            if not node and isinstance(elem, nodes.paragraph):
                node = elem
            elif isinstance(elem, nodes.system_message):
                elem['backrefs'] = []
                messages.append(elem)
            else:
                return [
                    self.reporter.error(
                        f'Error in "{self.name}" directive: may contain '
                        'a single paragraph only.', line=self.lineno)]
        if node:
            return messages + node.children
        return messages


class Unicode(Directive):

    r"""
    Convert Unicode character codes (numbers) to characters.  Codes may be
    decimal numbers, hexadecimal numbers (prefixed by ``0x``, ``x``, ``\x``,
    ``U+``, ``u``, or ``\u``; e.g. ``U+262E``), or XML-style numeric character
    entities (e.g. ``&#x262E;``).  Text following ".." is a comment and is
    ignored.  Spaces are ignored, and any other text remains as-is.
    """

    required_arguments = 1
    optional_arguments = 0
    final_argument_whitespace = True
    option_spec = {'trim': directives.flag,
                   'ltrim': directives.flag,
                   'rtrim': directives.flag}

    comment_pattern = re.compile(r'( |\n|^)\.\. ')

    def run(self):
        if not isinstance(self.state, states.SubstitutionDef):
            raise self.error(
                'Invalid context: the "%s" directive can only be used within '
                'a substitution definition.' % self.name)
        substitution_definition = self.state_machine.node
        if 'trim' in self.options:
            substitution_definition.attributes['ltrim'] = 1
            substitution_definition.attributes['rtrim'] = 1
        if 'ltrim' in self.options:
            substitution_definition.attributes['ltrim'] = 1
        if 'rtrim' in self.options:
            substitution_definition.attributes['rtrim'] = 1
        codes = self.comment_pattern.split(self.arguments[0])[0].split()
        element = nodes.Element()
        for code in codes:
            try:
                decoded = directives.unicode_code(code)
            except ValueError as error:
                raise self.error('Invalid character code: %s\n%s'
                                 % (code, io.error_string(error)))
            element += nodes.Text(decoded)
        return element.children


class Class(Directive):

    """
    Set a "class" attribute on the directive content or the next element.
    When applied to the next element, a "pending" element is inserted, and a
    transform does the work later.
    """

    required_arguments = 1
    optional_arguments = 0
    final_argument_whitespace = True
    has_content = True

    def run(self):
        try:
            class_value = directives.class_option(self.arguments[0])
        except ValueError:
            raise self.error(
                'Invalid class attribute value for "%s" directive: "%s".'
                % (self.name, self.arguments[0]))
        node_list = []
        if self.content:
            container = nodes.Element()
            self.state.nested_parse(self.content, self.content_offset,
                                    container)
            for node in container:
                node['classes'].extend(class_value)
            node_list.extend(container.children)
        else:
            pending = nodes.pending(
                misc.ClassAttribute,
                {'class': class_value, 'directive': self.name},
                self.block_text)
            self.state_machine.document.note_pending(pending)
            node_list.append(pending)
        return node_list


class Role(Directive):

    has_content = True

    argument_pattern = re.compile(r'(%s)\s*(\(\s*(%s)\s*\)\s*)?$'
                                  % ((states.Inliner.simplename,) * 2))

    def run(self):
        """Dynamically create and register a custom interpreted text role."""
        if self.content_offset > self.lineno or not self.content:
            raise self.error('"%s" directive requires arguments on the first '
                             'line.' % self.name)
        args = self.content[0]
        match = self.argument_pattern.match(args)
        if not match:
            raise self.error('"%s" directive arguments not valid role names: '
                             '"%s".' % (self.name, args))
        new_role_name = match.group(1)
        base_role_name = match.group(3)
        messages = []
        if base_role_name:
            base_role, messages = roles.role(
                base_role_name, self.state_machine.language, self.lineno,
                self.state.reporter)
            if base_role is None:
                error = self.state.reporter.error(
                    'Unknown interpreted text role "%s".' % base_role_name,
                    nodes.literal_block(self.block_text, self.block_text),
                    line=self.lineno)
                return messages + [error]
        else:
            base_role = roles.generic_custom_role
        assert not hasattr(base_role, 'arguments'), (
            'Supplemental directive arguments for "%s" directive not '
            'supported (specified by "%r" role).' % (self.name, base_role))
        try:
            converted_role = convert_directive_function(base_role)
            (arguments, options, content, content_offset
             ) = self.state.parse_directive_block(
                    self.content[1:], self.content_offset,
                    converted_role, option_presets={})
        except states.MarkupError as detail:
            error = self.reporter.error(
                'Error in "%s" directive:\n%s.' % (self.name, detail),
                nodes.literal_block(self.block_text, self.block_text),
                line=self.lineno)
            return messages + [error]
        if 'class' not in options:
            try:
                options['class'] = directives.class_option(new_role_name)
            except ValueError as detail:
                error = self.reporter.error(
                    'Invalid argument for "%s" directive:\n%s.'
                    % (self.name, detail),
                    nodes.literal_block(self.block_text, self.block_text),
                    line=self.lineno)
                return messages + [error]
        role = roles.CustomRole(new_role_name, base_role, options, content)
        roles.register_local_role(new_role_name, role)
        return messages


class DefaultRole(Directive):

    """Set the default interpreted text role."""

    optional_arguments = 1
    final_argument_whitespace = False

    def run(self):
        if not self.arguments:
            if '' in roles._roles:
                # restore the "default" default role
                del roles._roles['']
            return []
        role_name = self.arguments[0]
        role, messages = roles.role(role_name, self.state_machine.language,
                                    self.lineno, self.state.reporter)
        if role is None:
            error = self.state.reporter.error(
                'Unknown interpreted text role "%s".' % role_name,
                nodes.literal_block(self.block_text, self.block_text),
                line=self.lineno)
            return messages + [error]
        roles._roles[''] = role
        return messages


class Title(Directive):

    required_arguments = 1
    optional_arguments = 0
    final_argument_whitespace = True

    def run(self):
        self.state_machine.document['title'] = self.arguments[0]
        return []


class MetaBody(states.SpecializedBody):

    def field_marker(self, match, context, next_state):
        """Meta element."""
        node, blank_finish = self.parsemeta(match)
        self.parent += node
        return [], next_state, []

    def parsemeta(self, match):
        name = self.parse_field_marker(match)
        name = nodes.unescape(utils.escape2null(name))
        (indented, indent, line_offset, blank_finish
         ) = self.state_machine.get_first_known_indented(match.end())
        node = nodes.meta()
        node['content'] = nodes.unescape(utils.escape2null(
                                            ' '.join(indented)))
        if not indented:
            line = self.state_machine.line
            msg = self.reporter.info(
                  'No content for meta tag "%s".' % name,
                  nodes.literal_block(line, line))
            return msg, blank_finish
        tokens = name.split()
        try:
            attname, val = utils.extract_name_value(tokens[0])[0]
            node[attname.lower()] = val
        except utils.NameValueError:
            node['name'] = tokens[0]
        for token in tokens[1:]:
            try:
                attname, val = utils.extract_name_value(token)[0]
                node[attname.lower()] = val
            except utils.NameValueError as detail:
                line = self.state_machine.line
                msg = self.reporter.error(
                      'Error parsing meta tag attribute "%s": %s.'
                      % (token, detail), nodes.literal_block(line, line))
                return msg, blank_finish
        return node, blank_finish


class Meta(Directive):

    has_content = True

    SMkwargs = {'state_classes': (MetaBody,)}

    def run(self):
        self.assert_has_content()
        node = nodes.Element()
        new_line_offset, blank_finish = self.state.nested_list_parse(
            self.content, self.content_offset, node,
            initial_state='MetaBody', blank_finish=True,
            state_machine_kwargs=self.SMkwargs)
        if (new_line_offset - self.content_offset) != len(self.content):
            # incomplete parse of block?
            error = self.reporter.error(
                'Invalid meta directive.',
                nodes.literal_block(self.block_text, self.block_text),
                line=self.lineno)
            node += error
        # insert at begin of document
        index = self.state.document.first_child_not_matching_class(
                                        (nodes.Titular, nodes.meta)) or 0
        self.state.document[index:index] = node.children
        return []


class Date(Directive):

    has_content = True

    def run(self):
        if not isinstance(self.state, states.SubstitutionDef):
            raise self.error(
                'Invalid context: the "%s" directive can only be used within '
                'a substitution definition.' % self.name)
        format_str = '\n'.join(self.content) or '%Y-%m-%d'
        # @@@
        # Use timestamp from the `SOURCE_DATE_EPOCH`_ environment variable?
        # Pro: Docutils-generated documentation
        #      can easily be part of `reproducible software builds`__
        #
        #      __ https://reproducible-builds.org/
        #
        # Con: Changes the specs, hard to predict behaviour,
        #
        # See also the discussion about \date \time \year in TeX
        # http://tug.org/pipermail/tex-k/2016-May/002704.html
        # source_date_epoch = os.environ.get('SOURCE_DATE_EPOCH')
        # if (source_date_epoch):
        #     text = time.strftime(format_str,
        #                          time.gmtime(int(source_date_epoch)))
        # else:
        text = time.strftime(format_str)
        return [nodes.Text(text)]


class TestDirective(Directive):

    """This directive is useful only for testing purposes."""

    optional_arguments = 1
    final_argument_whitespace = True
    option_spec = {'option': directives.unchanged_required}
    has_content = True

    def run(self):
        if self.content:
            text = '\n'.join(self.content)
            info = self.reporter.info(
                'Directive processed. Type="%s", arguments=%r, options=%r, '
                'content:' % (self.name, self.arguments, self.options),
                nodes.literal_block(text, text), line=self.lineno)
        else:
            info = self.reporter.info(
                'Directive processed. Type="%s", arguments=%r, options=%r, '
                'content: None' % (self.name, self.arguments, self.options),
                line=self.lineno)
        return [info]