bots-as-digital-infrapunctures/bots-venv/lib/python3.7/site-packages/pelican/contents.py


								import copy

								import datetime

								import locale

								import logging

								import os

								import re

								from urllib.parse import urljoin, urlparse, urlunparse


								import pytz


								from pelican.plugins import signals

								from pelican.settings import DEFAULT_CONFIG

								from pelican.utils import (deprecated_attribute, memoized, path_to_url,

								                           posixize_path, sanitised_join, set_date_tzinfo,

								                           slugify, truncate_html_words)


								# Import these so that they're avalaible when you import from pelican.contents.

								from pelican.urlwrappers import (Author, Category, Tag, URLWrapper)  # NOQA


								logger = logging.getLogger(__name__)


								class Content:

								    """Represents a content.


								    :param content: the string to parse, containing the original content.

								    :param metadata: the metadata associated to this page (optional).

								    :param settings: the settings dictionary (optional).

								    :param source_path: The location of the source of this content (if any).

								    :param context: The shared context between generators.


								    """

								    @deprecated_attribute(old='filename', new='source_path', since=(3, 2, 0))

								    def filename():

								        return None


								    def __init__(self, content, metadata=None, settings=None,

								                 source_path=None, context=None):

								        if metadata is None:

								            metadata = {}

								        if settings is None:

								            settings = copy.deepcopy(DEFAULT_CONFIG)


								        self.settings = settings

								        self._content = content

								        if context is None:

								            context = {}

								        self._context = context

								        self.translations = []


								        local_metadata = dict()

								        local_metadata.update(metadata)


								        # set metadata as attributes

								        for key, value in local_metadata.items():

								            if key in ('save_as', 'url'):

								                key = 'override_' + key

								            setattr(self, key.lower(), value)


								        # also keep track of the metadata attributes available

								        self.metadata = local_metadata


								        # default template if it's not defined in page

								        self.template = self._get_template()


								        # First, read the authors from "authors", if not, fallback to "author"

								        # and if not use the settings defined one, if any.

								        if not hasattr(self, 'author'):

								            if hasattr(self, 'authors'):

								                self.author = self.authors[0]

								            elif 'AUTHOR' in settings:

								                self.author = Author(settings['AUTHOR'], settings)


								        if not hasattr(self, 'authors') and hasattr(self, 'author'):

								            self.authors = [self.author]


								        # XXX Split all the following code into pieces, there is too much here.


								        # manage languages

								        self.in_default_lang = True

								        if 'DEFAULT_LANG' in settings:

								            default_lang = settings['DEFAULT_LANG'].lower()

								            if not hasattr(self, 'lang'):

								                self.lang = default_lang


								            self.in_default_lang = (self.lang == default_lang)


								        # create the slug if not existing, generate slug according to

								        # setting of SLUG_ATTRIBUTE

								        if not hasattr(self, 'slug'):

								            if (settings['SLUGIFY_SOURCE'] == 'title' and

								                    hasattr(self, 'title')):

								                value = self.title

								            elif (settings['SLUGIFY_SOURCE'] == 'basename' and

								                    source_path is not None):

								                value = os.path.basename(os.path.splitext(source_path)[0])

								            else:

								                value = None

								            if value is not None:

								                self.slug = slugify(

								                    value,

								                    regex_subs=settings.get('SLUG_REGEX_SUBSTITUTIONS', []),

								                    preserve_case=settings.get('SLUGIFY_PRESERVE_CASE', False),

								                    use_unicode=settings.get('SLUGIFY_USE_UNICODE', False))


								        self.source_path = source_path

								        self.relative_source_path = self.get_relative_source_path()


								        # manage the date format

								        if not hasattr(self, 'date_format'):

								            if hasattr(self, 'lang') and self.lang in settings['DATE_FORMATS']:

								                self.date_format = settings['DATE_FORMATS'][self.lang]

								            else:

								                self.date_format = settings['DEFAULT_DATE_FORMAT']


								        if isinstance(self.date_format, tuple):

								            locale_string = self.date_format[0]

								            locale.setlocale(locale.LC_ALL, locale_string)

								            self.date_format = self.date_format[1]


								        # manage timezone

								        default_timezone = settings.get('TIMEZONE', 'UTC')

								        timezone = getattr(self, 'timezone', default_timezone)

								        self.timezone = pytz.timezone(timezone)


								        if hasattr(self, 'date'):

								            self.date = set_date_tzinfo(self.date, timezone)

								            self.locale_date = self.date.strftime(self.date_format)


								        if hasattr(self, 'modified'):

								            self.modified = set_date_tzinfo(self.modified, timezone)

								            self.locale_modified = self.modified.strftime(self.date_format)


								        # manage status

								        if not hasattr(self, 'status'):

								            # Previous default of None broke comment plugins and perhaps others

								            self.status = getattr(self, 'default_status', '')


								        # store the summary metadata if it is set

								        if 'summary' in metadata:

								            self._summary = metadata['summary']


								        signals.content_object_init.send(self)


								    def __str__(self):

								        return self.source_path or repr(self)


								    def _has_valid_mandatory_properties(self):

								        """Test mandatory properties are set."""

								        for prop in self.mandatory_properties:

								            if not hasattr(self, prop):

								                logger.error(

								                    "Skipping %s: could not find information about '%s'",

								                    self, prop)

								                return False

								        return True


								    def _has_valid_save_as(self):

								        """Return true if save_as doesn't write outside output path, false

								        otherwise."""

								        try:

								            output_path = self.settings["OUTPUT_PATH"]

								        except KeyError:

								            # we cannot check

								            return True


								        try:

								            sanitised_join(output_path, self.save_as)

								        except RuntimeError:  # outside output_dir

								            logger.error(

								                "Skipping %s: file %r would be written outside output path",

								                self,

								                self.save_as,

								            )

								            return False


								        return True


								    def _has_valid_status(self):

								        if hasattr(self, 'allowed_statuses'):

								            if self.status not in self.allowed_statuses:

								                logger.error(

								                    "Unknown status '%s' for file %s, skipping it.",

								                    self.status,

								                    self

								                )

								                return False


								        # if undefined we allow all

								        return True


								    def is_valid(self):

								        """Validate Content"""

								        # Use all() to not short circuit and get results of all validations

								        return all([self._has_valid_mandatory_properties(),

								                    self._has_valid_save_as(),

								                    self._has_valid_status()])


								    @property

								    def url_format(self):

								        """Returns the URL, formatted with the proper values"""

								        metadata = copy.copy(self.metadata)

								        path = self.metadata.get('path', self.get_relative_source_path())

								        metadata.update({

								            'path': path_to_url(path),

								            'slug': getattr(self, 'slug', ''),

								            'lang': getattr(self, 'lang', 'en'),

								            'date': getattr(self, 'date', datetime.datetime.now()),

								            'author': self.author.slug if hasattr(self, 'author') else '',

								            'category': self.category.slug if hasattr(self, 'category') else ''

								        })

								        return metadata


								    def _expand_settings(self, key, klass=None):

								        if not klass:

								            klass = self.__class__.__name__

								        fq_key = ('{}_{}'.format(klass, key)).upper()

								        return self.settings[fq_key].format(**self.url_format)


								    def get_url_setting(self, key):

								        if hasattr(self, 'override_' + key):

								            return getattr(self, 'override_' + key)

								        key = key if self.in_default_lang else 'lang_%s' % key

								        return self._expand_settings(key)


								    def _link_replacer(self, siteurl, m):

								        what = m.group('what')

								        value = urlparse(m.group('value'))

								        path = value.path

								        origin = m.group('path')


								        # urllib.parse.urljoin() produces `a.html` for urljoin("..", "a.html")

								        # so if RELATIVE_URLS are enabled, we fall back to os.path.join() to

								        # properly get `../a.html`. However, os.path.join() produces

								        # `baz/http://foo/bar.html` for join("baz", "http://foo/bar.html")

								        # instead of correct "http://foo/bar.html", so one has to pick a side

								        # as there is no silver bullet.

								        if self.settings['RELATIVE_URLS']:

								            joiner = os.path.join

								        else:

								            joiner = urljoin


								            # However, it's not *that* simple: urljoin("blog", "index.html")

								            # produces just `index.html` instead of `blog/index.html` (unlike

								            # os.path.join()), so in order to get a correct answer one needs to

								            # append a trailing slash to siteurl in that case. This also makes

								            # the new behavior fully compatible with Pelican 3.7.1.

								            if not siteurl.endswith('/'):

								                siteurl += '/'


								        # XXX Put this in a different location.

								        if what in {'filename', 'static', 'attach'}:

								            if path.startswith('/'):

								                path = path[1:]

								            else:

								                # relative to the source path of this content

								                path = self.get_relative_source_path(

								                    os.path.join(self.relative_dir, path)

								                )


								            key = 'static_content' if what in ('static', 'attach')\

								                else 'generated_content'


								            def _get_linked_content(key, path):

								                try:

								                    return self._context[key][path]

								                except KeyError:

								                    try:

								                        # Markdown escapes spaces, try unescaping

								                        return self._context[key][path.replace('%20', ' ')]

								                    except KeyError:

								                        if what == 'filename' and key == 'generated_content':

								                            key = 'static_content'

								                            linked_content = _get_linked_content(key, path)

								                            if linked_content:

								                                logger.warning(

								                                    '{filename} used for linking to static'

								                                    ' content %s in %s. Use {static} instead',

								                                    path,

								                                    self.get_relative_source_path())

								                                return linked_content

								                        return None


								            linked_content = _get_linked_content(key, path)

								            if linked_content:

								                if what == 'attach':

								                    linked_content.attach_to(self)

								                origin = joiner(siteurl, linked_content.url)

								                origin = origin.replace('\\', '/')  # for Windows paths.

								            else:

								                logger.warning(

								                    "Unable to find '%s', skipping url replacement.",

								                    value.geturl(), extra={

								                        'limit_msg': ("Other resources were not found "

								                                      "and their urls not replaced")})

								        elif what == 'category':

								            origin = joiner(siteurl, Category(path, self.settings).url)

								        elif what == 'tag':

								            origin = joiner(siteurl, Tag(path, self.settings).url)

								        elif what == 'index':

								            origin = joiner(siteurl, self.settings['INDEX_SAVE_AS'])

								        elif what == 'author':

								            origin = joiner(siteurl, Author(path, self.settings).url)

								        else:

								            logger.warning(

								                "Replacement Indicator '%s' not recognized, "

								                "skipping replacement",

								                what)


								        # keep all other parts, such as query, fragment, etc.

								        parts = list(value)

								        parts[2] = origin

								        origin = urlunparse(parts)


								        return ''.join((m.group('markup'), m.group('quote'), origin,

								                        m.group('quote')))


								    def _get_intrasite_link_regex(self):

								        intrasite_link_regex = self.settings['INTRASITE_LINK_REGEX']

								        regex = r"""

								            (?P<markup><[^\>]+  # match tag with all url-value attributes

								                (?:href|src|poster|data|cite|formaction|action)\s*=\s*)


								            (?P<quote>["\'])      # require value to be quoted

								            (?P<path>{}(?P<value>.*?))  # the url value

								            \2""".format(intrasite_link_regex)

								        return re.compile(regex, re.X)


								    def _update_content(self, content, siteurl):

								        """Update the content attribute.


								        Change all the relative paths of the content to relative paths

								        suitable for the output content.


								        :param content: content resource that will be passed to the templates.

								        :param siteurl: siteurl which is locally generated by the writer in

								                        case of RELATIVE_URLS.

								        """

								        if not content:

								            return content


								        hrefs = self._get_intrasite_link_regex()

								        return hrefs.sub(lambda m: self._link_replacer(siteurl, m), content)


								    def get_static_links(self):

								        static_links = set()

								        hrefs = self._get_intrasite_link_regex()

								        for m in hrefs.finditer(self._content):

								            what = m.group('what')

								            value = urlparse(m.group('value'))

								            path = value.path

								            if what not in {'static', 'attach'}:

								                continue

								            if path.startswith('/'):

								                path = path[1:]

								            else:

								                # relative to the source path of this content

								                path = self.get_relative_source_path(

								                    os.path.join(self.relative_dir, path)

								                )

								            path = path.replace('%20', ' ')

								            static_links.add(path)

								        return static_links


								    def get_siteurl(self):

								        return self._context.get('localsiteurl', '')


								    @memoized

								    def get_content(self, siteurl):

								        if hasattr(self, '_get_content'):

								            content = self._get_content()

								        else:

								            content = self._content

								        return self._update_content(content, siteurl)


								    @property

								    def content(self):

								        return self.get_content(self.get_siteurl())


								    @memoized

								    def get_summary(self, siteurl):

								        """Returns the summary of an article.


								        This is based on the summary metadata if set, otherwise truncate the

								        content.

								        """

								        if 'summary' in self.metadata:

								            return self.metadata['summary']


								        if self.settings['SUMMARY_MAX_LENGTH'] is None:

								            return self.content


								        return truncate_html_words(self.content,

								                                   self.settings['SUMMARY_MAX_LENGTH'],

								                                   self.settings['SUMMARY_END_SUFFIX'])


								    @property

								    def summary(self):

								        return self.get_summary(self.get_siteurl())


								    def _get_summary(self):

								        """deprecated function to access summary"""


								        logger.warning('_get_summary() has been deprecated since 3.6.4. '

								                       'Use the summary decorator instead')

								        return self.summary


								    @summary.setter

								    def summary(self, value):

								        """Dummy function"""

								        pass


								    @property

								    def status(self):

								        return self._status


								    @status.setter

								    def status(self, value):

								        # TODO maybe typecheck

								        self._status = value.lower()


								    @property

								    def url(self):

								        return self.get_url_setting('url')


								    @property

								    def save_as(self):

								        return self.get_url_setting('save_as')


								    def _get_template(self):

								        if hasattr(self, 'template') and self.template is not None:

								            return self.template

								        else:

								            return self.default_template


								    def get_relative_source_path(self, source_path=None):

								        """Return the relative path (from the content path) to the given

								        source_path.


								        If no source path is specified, use the source path of this

								        content object.

								        """

								        if not source_path:

								            source_path = self.source_path

								        if source_path is None:

								            return None


								        return posixize_path(

								            os.path.relpath(

								                os.path.abspath(os.path.join(

								                    self.settings['PATH'],

								                    source_path)),

								                os.path.abspath(self.settings['PATH'])

								            ))


								    @property

								    def relative_dir(self):

								        return posixize_path(

								            os.path.dirname(

								                os.path.relpath(

								                    os.path.abspath(self.source_path),

								                    os.path.abspath(self.settings['PATH']))))


								    def refresh_metadata_intersite_links(self):

								        for key in self.settings['FORMATTED_FIELDS']:

								            if key in self.metadata and key != 'summary':

								                value = self._update_content(

								                    self.metadata[key],

								                    self.get_siteurl()

								                )

								                self.metadata[key] = value

								                setattr(self, key.lower(), value)


								        # _summary is an internal variable that some plugins may be writing to,

								        # so ensure changes to it are picked up

								        if ('summary' in self.settings['FORMATTED_FIELDS'] and

								                'summary' in self.metadata):

								            self._summary = self._update_content(

								                self._summary,

								                self.get_siteurl()

								            )

								            self.metadata['summary'] = self._summary


								class Page(Content):

								    mandatory_properties = ('title',)

								    allowed_statuses = ('published', 'hidden', 'draft')

								    default_status = 'published'

								    default_template = 'page'


								    def _expand_settings(self, key):

								        klass = 'draft_page' if self.status == 'draft' else None

								        return super()._expand_settings(key, klass)


								class Article(Content):

								    mandatory_properties = ('title', 'date', 'category')

								    allowed_statuses = ('published', 'draft')

								    default_status = 'published'

								    default_template = 'article'


								    def __init__(self, *args, **kwargs):

								        super().__init__(*args, **kwargs)


								        # handle WITH_FUTURE_DATES (designate article to draft based on date)

								        if not self.settings['WITH_FUTURE_DATES'] and hasattr(self, 'date'):

								            if self.date.tzinfo is None:

								                now = datetime.datetime.now()

								            else:

								                now = datetime.datetime.utcnow().replace(tzinfo=pytz.utc)

								            if self.date > now:

								                self.status = 'draft'


								        # if we are a draft and there is no date provided, set max datetime

								        if not hasattr(self, 'date') and self.status == 'draft':

								            self.date = datetime.datetime.max.replace(tzinfo=self.timezone)


								    def _expand_settings(self, key):

								        klass = 'draft' if self.status == 'draft' else 'article'

								        return super()._expand_settings(key, klass)


								class Static(Content):

								    mandatory_properties = ('title',)

								    default_status = 'published'

								    default_template = None


								    def __init__(self, *args, **kwargs):

								        super().__init__(*args, **kwargs)

								        self._output_location_referenced = False


								    @deprecated_attribute(old='filepath', new='source_path', since=(3, 2, 0))

								    def filepath():

								        return None


								    @deprecated_attribute(old='src', new='source_path', since=(3, 2, 0))

								    def src():

								        return None


								    @deprecated_attribute(old='dst', new='save_as', since=(3, 2, 0))

								    def dst():

								        return None


								    @property

								    def url(self):

								        # Note when url has been referenced, so we can avoid overriding it.

								        self._output_location_referenced = True

								        return super().url


								    @property

								    def save_as(self):

								        # Note when save_as has been referenced, so we can avoid overriding it.

								        self._output_location_referenced = True

								        return super().save_as


								    def attach_to(self, content):

								        """Override our output directory with that of the given content object.

								        """


								        # Determine our file's new output path relative to the linking

								        # document. If it currently lives beneath the linking

								        # document's source directory, preserve that relationship on output.

								        # Otherwise, make it a sibling.


								        linking_source_dir = os.path.dirname(content.source_path)

								        tail_path = os.path.relpath(self.source_path, linking_source_dir)

								        if tail_path.startswith(os.pardir + os.sep):

								            tail_path = os.path.basename(tail_path)

								        new_save_as = os.path.join(

								            os.path.dirname(content.save_as), tail_path)


								        # We do not build our new url by joining tail_path with the linking

								        # document's url, because we cannot know just by looking at the latter

								        # whether it points to the document itself or to its parent directory.

								        # (An url like 'some/content' might mean a directory named 'some'

								        # with a file named 'content', or it might mean a directory named

								        # 'some/content' with a file named 'index.html'.) Rather than trying

								        # to figure it out by comparing the linking document's url and save_as

								        # path, we simply build our new url from our new save_as path.


								        new_url = path_to_url(new_save_as)


								        def _log_reason(reason):

								            logger.warning(

								                "The {attach} link in %s cannot relocate "

								                "%s because %s. Falling back to "

								                "{filename} link behavior instead.",

								                content.get_relative_source_path(),

								                self.get_relative_source_path(), reason,

								                extra={'limit_msg': "More {attach} warnings silenced."})


								        # We never override an override, because we don't want to interfere

								        # with user-defined overrides that might be in EXTRA_PATH_METADATA.

								        if hasattr(self, 'override_save_as') or hasattr(self, 'override_url'):

								            if new_save_as != self.save_as or new_url != self.url:

								                _log_reason("its output location was already overridden")

								            return


								        # We never change an output path that has already been referenced,

								        # because we don't want to break links that depend on that path.

								        if self._output_location_referenced:

								            if new_save_as != self.save_as or new_url != self.url:

								                _log_reason("another link already referenced its location")

								            return


								        self.override_save_as = new_save_as

								        self.override_url = new_url