import copy
import datetime
import locale
import logging
import os
import re
from urllib.parse import urljoin, urlparse, urlunparse

import pytz

from pelican.plugins import signals
from pelican.settings import DEFAULT_CONFIG
from pelican.utils import (deprecated_attribute, memoized, path_to_url,
                           posixize_path, sanitised_join, set_date_tzinfo,
                           slugify, truncate_html_words)

# Import these so that they're avalaible when you import from pelican.contents.
from pelican.urlwrappers import (Author, Category, Tag, URLWrapper)  # NOQA

logger = logging.getLogger(__name__)


class Content:
    """Represents a content.

    :param content: the string to parse, containing the original content.
    :param metadata: the metadata associated to this page (optional).
    :param settings: the settings dictionary (optional).
    :param source_path: The location of the source of this content (if any).
    :param context: The shared context between generators.

    """
    @deprecated_attribute(old='filename', new='source_path', since=(3, 2, 0))
    def filename():
        return None

    def __init__(self, content, metadata=None, settings=None,
                 source_path=None, context=None):
        if metadata is None:
            metadata = {}
        if settings is None:
            settings = copy.deepcopy(DEFAULT_CONFIG)

        self.settings = settings
        self._content = content
        if context is None:
            context = {}
        self._context = context
        self.translations = []

        local_metadata = dict()
        local_metadata.update(metadata)

        # set metadata as attributes
        for key, value in local_metadata.items():
            if key in ('save_as', 'url'):
                key = 'override_' + key
            setattr(self, key.lower(), value)

        # also keep track of the metadata attributes available
        self.metadata = local_metadata

        # default template if it's not defined in page
        self.template = self._get_template()

        # First, read the authors from "authors", if not, fallback to "author"
        # and if not use the settings defined one, if any.
        if not hasattr(self, 'author'):
            if hasattr(self, 'authors'):
                self.author = self.authors[0]
            elif 'AUTHOR' in settings:
                self.author = Author(settings['AUTHOR'], settings)

        if not hasattr(self, 'authors') and hasattr(self, 'author'):
            self.authors = [self.author]

        # XXX Split all the following code into pieces, there is too much here.

        # manage languages
        self.in_default_lang = True
        if 'DEFAULT_LANG' in settings:
            default_lang = settings['DEFAULT_LANG'].lower()
            if not hasattr(self, 'lang'):
                self.lang = default_lang

            self.in_default_lang = (self.lang == default_lang)

        # create the slug if not existing, generate slug according to
        # setting of SLUG_ATTRIBUTE
        if not hasattr(self, 'slug'):
            if (settings['SLUGIFY_SOURCE'] == 'title' and
                    hasattr(self, 'title')):
                value = self.title
            elif (settings['SLUGIFY_SOURCE'] == 'basename' and
                    source_path is not None):
                value = os.path.basename(os.path.splitext(source_path)[0])
            else:
                value = None
            if value is not None:
                self.slug = slugify(
                    value,
                    regex_subs=settings.get('SLUG_REGEX_SUBSTITUTIONS', []),
                    preserve_case=settings.get('SLUGIFY_PRESERVE_CASE', False),
                    use_unicode=settings.get('SLUGIFY_USE_UNICODE', False))

        self.source_path = source_path
        self.relative_source_path = self.get_relative_source_path()

        # manage the date format
        if not hasattr(self, 'date_format'):
            if hasattr(self, 'lang') and self.lang in settings['DATE_FORMATS']:
                self.date_format = settings['DATE_FORMATS'][self.lang]
            else:
                self.date_format = settings['DEFAULT_DATE_FORMAT']

        if isinstance(self.date_format, tuple):
            locale_string = self.date_format[0]
            locale.setlocale(locale.LC_ALL, locale_string)
            self.date_format = self.date_format[1]

        # manage timezone
        default_timezone = settings.get('TIMEZONE', 'UTC')
        timezone = getattr(self, 'timezone', default_timezone)
        self.timezone = pytz.timezone(timezone)

        if hasattr(self, 'date'):
            self.date = set_date_tzinfo(self.date, timezone)
            self.locale_date = self.date.strftime(self.date_format)

        if hasattr(self, 'modified'):
            self.modified = set_date_tzinfo(self.modified, timezone)
            self.locale_modified = self.modified.strftime(self.date_format)

        # manage status
        if not hasattr(self, 'status'):
            # Previous default of None broke comment plugins and perhaps others
            self.status = getattr(self, 'default_status', '')

        # store the summary metadata if it is set
        if 'summary' in metadata:
            self._summary = metadata['summary']

        signals.content_object_init.send(self)

    def __str__(self):
        return self.source_path or repr(self)

    def _has_valid_mandatory_properties(self):
        """Test mandatory properties are set."""
        for prop in self.mandatory_properties:
            if not hasattr(self, prop):
                logger.error(
                    "Skipping %s: could not find information about '%s'",
                    self, prop)
                return False
        return True

    def _has_valid_save_as(self):
        """Return true if save_as doesn't write outside output path, false
        otherwise."""
        try:
            output_path = self.settings["OUTPUT_PATH"]
        except KeyError:
            # we cannot check
            return True

        try:
            sanitised_join(output_path, self.save_as)
        except RuntimeError:  # outside output_dir
            logger.error(
                "Skipping %s: file %r would be written outside output path",
                self,
                self.save_as,
            )
            return False

        return True

    def _has_valid_status(self):
        if hasattr(self, 'allowed_statuses'):
            if self.status not in self.allowed_statuses:
                logger.error(
                    "Unknown status '%s' for file %s, skipping it.",
                    self.status,
                    self
                )
                return False

        # if undefined we allow all
        return True

    def is_valid(self):
        """Validate Content"""
        # Use all() to not short circuit and get results of all validations
        return all([self._has_valid_mandatory_properties(),
                    self._has_valid_save_as(),
                    self._has_valid_status()])

    @property
    def url_format(self):
        """Returns the URL, formatted with the proper values"""
        metadata = copy.copy(self.metadata)
        path = self.metadata.get('path', self.get_relative_source_path())
        metadata.update({
            'path': path_to_url(path),
            'slug': getattr(self, 'slug', ''),
            'lang': getattr(self, 'lang', 'en'),
            'date': getattr(self, 'date', datetime.datetime.now()),
            'author': self.author.slug if hasattr(self, 'author') else '',
            'category': self.category.slug if hasattr(self, 'category') else ''
        })
        return metadata

    def _expand_settings(self, key, klass=None):
        if not klass:
            klass = self.__class__.__name__
        fq_key = ('{}_{}'.format(klass, key)).upper()
        return self.settings[fq_key].format(**self.url_format)

    def get_url_setting(self, key):
        if hasattr(self, 'override_' + key):
            return getattr(self, 'override_' + key)
        key = key if self.in_default_lang else 'lang_%s' % key
        return self._expand_settings(key)

    def _link_replacer(self, siteurl, m):
        what = m.group('what')
        value = urlparse(m.group('value'))
        path = value.path
        origin = m.group('path')

        # urllib.parse.urljoin() produces `a.html` for urljoin("..", "a.html")
        # so if RELATIVE_URLS are enabled, we fall back to os.path.join() to
        # properly get `../a.html`. However, os.path.join() produces
        # `baz/http://foo/bar.html` for join("baz", "http://foo/bar.html")
        # instead of correct "http://foo/bar.html", so one has to pick a side
        # as there is no silver bullet.
        if self.settings['RELATIVE_URLS']:
            joiner = os.path.join
        else:
            joiner = urljoin

            # However, it's not *that* simple: urljoin("blog", "index.html")
            # produces just `index.html` instead of `blog/index.html` (unlike
            # os.path.join()), so in order to get a correct answer one needs to
            # append a trailing slash to siteurl in that case. This also makes
            # the new behavior fully compatible with Pelican 3.7.1.
            if not siteurl.endswith('/'):
                siteurl += '/'

        # XXX Put this in a different location.
        if what in {'filename', 'static', 'attach'}:
            if path.startswith('/'):
                path = path[1:]
            else:
                # relative to the source path of this content
                path = self.get_relative_source_path(
                    os.path.join(self.relative_dir, path)
                )

            key = 'static_content' if what in ('static', 'attach')\
                else 'generated_content'

            def _get_linked_content(key, path):
                try:
                    return self._context[key][path]
                except KeyError:
                    try:
                        # Markdown escapes spaces, try unescaping
                        return self._context[key][path.replace('%20', ' ')]
                    except KeyError:
                        if what == 'filename' and key == 'generated_content':
                            key = 'static_content'
                            linked_content = _get_linked_content(key, path)
                            if linked_content:
                                logger.warning(
                                    '{filename} used for linking to static'
                                    ' content %s in %s. Use {static} instead',
                                    path,
                                    self.get_relative_source_path())
                                return linked_content
                        return None

            linked_content = _get_linked_content(key, path)
            if linked_content:
                if what == 'attach':
                    linked_content.attach_to(self)
                origin = joiner(siteurl, linked_content.url)
                origin = origin.replace('\\', '/')  # for Windows paths.
            else:
                logger.warning(
                    "Unable to find '%s', skipping url replacement.",
                    value.geturl(), extra={
                        'limit_msg': ("Other resources were not found "
                                      "and their urls not replaced")})
        elif what == 'category':
            origin = joiner(siteurl, Category(path, self.settings).url)
        elif what == 'tag':
            origin = joiner(siteurl, Tag(path, self.settings).url)
        elif what == 'index':
            origin = joiner(siteurl, self.settings['INDEX_SAVE_AS'])
        elif what == 'author':
            origin = joiner(siteurl, Author(path, self.settings).url)
        else:
            logger.warning(
                "Replacement Indicator '%s' not recognized, "
                "skipping replacement",
                what)

        # keep all other parts, such as query, fragment, etc.
        parts = list(value)
        parts[2] = origin
        origin = urlunparse(parts)

        return ''.join((m.group('markup'), m.group('quote'), origin,
                        m.group('quote')))

    def _get_intrasite_link_regex(self):
        intrasite_link_regex = self.settings['INTRASITE_LINK_REGEX']
        regex = r"""
            (?P<markup><[^\>]+  # match tag with all url-value attributes
                (?:href|src|poster|data|cite|formaction|action)\s*=\s*)

            (?P<quote>["\'])      # require value to be quoted
            (?P<path>{}(?P<value>.*?))  # the url value
            \2""".format(intrasite_link_regex)
        return re.compile(regex, re.X)

    def _update_content(self, content, siteurl):
        """Update the content attribute.

        Change all the relative paths of the content to relative paths
        suitable for the output content.

        :param content: content resource that will be passed to the templates.
        :param siteurl: siteurl which is locally generated by the writer in
                        case of RELATIVE_URLS.
        """
        if not content:
            return content

        hrefs = self._get_intrasite_link_regex()
        return hrefs.sub(lambda m: self._link_replacer(siteurl, m), content)

    def get_static_links(self):
        static_links = set()
        hrefs = self._get_intrasite_link_regex()
        for m in hrefs.finditer(self._content):
            what = m.group('what')
            value = urlparse(m.group('value'))
            path = value.path
            if what not in {'static', 'attach'}:
                continue
            if path.startswith('/'):
                path = path[1:]
            else:
                # relative to the source path of this content
                path = self.get_relative_source_path(
                    os.path.join(self.relative_dir, path)
                )
            path = path.replace('%20', ' ')
            static_links.add(path)
        return static_links

    def get_siteurl(self):
        return self._context.get('localsiteurl', '')

    @memoized
    def get_content(self, siteurl):
        if hasattr(self, '_get_content'):
            content = self._get_content()
        else:
            content = self._content
        return self._update_content(content, siteurl)

    @property
    def content(self):
        return self.get_content(self.get_siteurl())

    @memoized
    def get_summary(self, siteurl):
        """Returns the summary of an article.

        This is based on the summary metadata if set, otherwise truncate the
        content.
        """
        if 'summary' in self.metadata:
            return self.metadata['summary']

        if self.settings['SUMMARY_MAX_LENGTH'] is None:
            return self.content

        return truncate_html_words(self.content,
                                   self.settings['SUMMARY_MAX_LENGTH'],
                                   self.settings['SUMMARY_END_SUFFIX'])

    @property
    def summary(self):
        return self.get_summary(self.get_siteurl())

    def _get_summary(self):
        """deprecated function to access summary"""

        logger.warning('_get_summary() has been deprecated since 3.6.4. '
                       'Use the summary decorator instead')
        return self.summary

    @summary.setter
    def summary(self, value):
        """Dummy function"""
        pass

    @property
    def status(self):
        return self._status

    @status.setter
    def status(self, value):
        # TODO maybe typecheck
        self._status = value.lower()

    @property
    def url(self):
        return self.get_url_setting('url')

    @property
    def save_as(self):
        return self.get_url_setting('save_as')

    def _get_template(self):
        if hasattr(self, 'template') and self.template is not None:
            return self.template
        else:
            return self.default_template

    def get_relative_source_path(self, source_path=None):
        """Return the relative path (from the content path) to the given
        source_path.

        If no source path is specified, use the source path of this
        content object.
        """
        if not source_path:
            source_path = self.source_path
        if source_path is None:
            return None

        return posixize_path(
            os.path.relpath(
                os.path.abspath(os.path.join(
                    self.settings['PATH'],
                    source_path)),
                os.path.abspath(self.settings['PATH'])
            ))

    @property
    def relative_dir(self):
        return posixize_path(
            os.path.dirname(
                os.path.relpath(
                    os.path.abspath(self.source_path),
                    os.path.abspath(self.settings['PATH']))))

    def refresh_metadata_intersite_links(self):
        for key in self.settings['FORMATTED_FIELDS']:
            if key in self.metadata and key != 'summary':
                value = self._update_content(
                    self.metadata[key],
                    self.get_siteurl()
                )
                self.metadata[key] = value
                setattr(self, key.lower(), value)

        # _summary is an internal variable that some plugins may be writing to,
        # so ensure changes to it are picked up
        if ('summary' in self.settings['FORMATTED_FIELDS'] and
                'summary' in self.metadata):
            self._summary = self._update_content(
                self._summary,
                self.get_siteurl()
            )
            self.metadata['summary'] = self._summary


class Page(Content):
    mandatory_properties = ('title',)
    allowed_statuses = ('published', 'hidden', 'draft')
    default_status = 'published'
    default_template = 'page'

    def _expand_settings(self, key):
        klass = 'draft_page' if self.status == 'draft' else None
        return super()._expand_settings(key, klass)


class Article(Content):
    mandatory_properties = ('title', 'date', 'category')
    allowed_statuses = ('published', 'draft')
    default_status = 'published'
    default_template = 'article'

    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)

        # handle WITH_FUTURE_DATES (designate article to draft based on date)
        if not self.settings['WITH_FUTURE_DATES'] and hasattr(self, 'date'):
            if self.date.tzinfo is None:
                now = datetime.datetime.now()
            else:
                now = datetime.datetime.utcnow().replace(tzinfo=pytz.utc)
            if self.date > now:
                self.status = 'draft'

        # if we are a draft and there is no date provided, set max datetime
        if not hasattr(self, 'date') and self.status == 'draft':
            self.date = datetime.datetime.max.replace(tzinfo=self.timezone)

    def _expand_settings(self, key):
        klass = 'draft' if self.status == 'draft' else 'article'
        return super()._expand_settings(key, klass)


class Static(Content):
    mandatory_properties = ('title',)
    default_status = 'published'
    default_template = None

    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self._output_location_referenced = False

    @deprecated_attribute(old='filepath', new='source_path', since=(3, 2, 0))
    def filepath():
        return None

    @deprecated_attribute(old='src', new='source_path', since=(3, 2, 0))
    def src():
        return None

    @deprecated_attribute(old='dst', new='save_as', since=(3, 2, 0))
    def dst():
        return None

    @property
    def url(self):
        # Note when url has been referenced, so we can avoid overriding it.
        self._output_location_referenced = True
        return super().url

    @property
    def save_as(self):
        # Note when save_as has been referenced, so we can avoid overriding it.
        self._output_location_referenced = True
        return super().save_as

    def attach_to(self, content):
        """Override our output directory with that of the given content object.
        """

        # Determine our file's new output path relative to the linking
        # document. If it currently lives beneath the linking
        # document's source directory, preserve that relationship on output.
        # Otherwise, make it a sibling.

        linking_source_dir = os.path.dirname(content.source_path)
        tail_path = os.path.relpath(self.source_path, linking_source_dir)
        if tail_path.startswith(os.pardir + os.sep):
            tail_path = os.path.basename(tail_path)
        new_save_as = os.path.join(
            os.path.dirname(content.save_as), tail_path)

        # We do not build our new url by joining tail_path with the linking
        # document's url, because we cannot know just by looking at the latter
        # whether it points to the document itself or to its parent directory.
        # (An url like 'some/content' might mean a directory named 'some'
        # with a file named 'content', or it might mean a directory named
        # 'some/content' with a file named 'index.html'.) Rather than trying
        # to figure it out by comparing the linking document's url and save_as
        # path, we simply build our new url from our new save_as path.

        new_url = path_to_url(new_save_as)

        def _log_reason(reason):
            logger.warning(
                "The {attach} link in %s cannot relocate "
                "%s because %s. Falling back to "
                "{filename} link behavior instead.",
                content.get_relative_source_path(),
                self.get_relative_source_path(), reason,
                extra={'limit_msg': "More {attach} warnings silenced."})

        # We never override an override, because we don't want to interfere
        # with user-defined overrides that might be in EXTRA_PATH_METADATA.
        if hasattr(self, 'override_save_as') or hasattr(self, 'override_url'):
            if new_save_as != self.save_as or new_url != self.url:
                _log_reason("its output location was already overridden")
            return

        # We never change an output path that has already been referenced,
        # because we don't want to break links that depend on that path.
        if self._output_location_referenced:
            if new_save_as != self.save_as or new_url != self.url:
                _log_reason("another link already referenced its location")
            return

        self.override_save_as = new_save_as
        self.override_url = new_url