bots-as-digital-infrapunctures/bots-venv/lib/python3.7/site-packages/pelican/tests/test_importer.py


								import locale

								import os

								import re

								from posixpath import join as posix_join


								from pelican.settings import DEFAULT_CONFIG

								from pelican.tests.support import (mute, skipIfNoExecutable, temporary_folder,

								                                   unittest)

								from pelican.tools.pelican_import import (blogger2fields, build_header,

								                                          build_markdown_header,

								                                          decode_wp_content,

								                                          download_attachments, fields2pelican,

								                                          get_attachments, wp2fields)

								from pelican.utils import path_to_file_url, slugify


								CUR_DIR = os.path.abspath(os.path.dirname(__file__))

								BLOGGER_XML_SAMPLE = os.path.join(CUR_DIR, 'content', 'bloggerexport.xml')

								WORDPRESS_XML_SAMPLE = os.path.join(CUR_DIR, 'content', 'wordpressexport.xml')

								WORDPRESS_ENCODED_CONTENT_SAMPLE = os.path.join(CUR_DIR,

								                                                'content',

								                                                'wordpress_content_encoded')

								WORDPRESS_DECODED_CONTENT_SAMPLE = os.path.join(CUR_DIR,

								                                                'content',

								                                                'wordpress_content_decoded')


								try:

								    from bs4 import BeautifulSoup

								except ImportError:

								    BeautifulSoup = False  # NOQA


								try:

								    import bs4.builder._lxml as LXML

								except ImportError:

								    LXML = False


								@skipIfNoExecutable(['pandoc', '--version'])

								@unittest.skipUnless(BeautifulSoup, 'Needs BeautifulSoup module')

								class TestBloggerXmlImporter(unittest.TestCase):


								    def setUp(self):

								        self.old_locale = locale.setlocale(locale.LC_ALL)

								        locale.setlocale(locale.LC_ALL, 'C')

								        self.posts = blogger2fields(BLOGGER_XML_SAMPLE)


								    def tearDown(self):

								        locale.setlocale(locale.LC_ALL, self.old_locale)


								    def test_recognise_kind_and_title(self):

								        """Check that importer only outputs pages, articles and comments,

								        that these are correctly identified and that titles are correct.

								        """

								        test_posts = list(self.posts)

								        kinds = {x[8] for x in test_posts}

								        self.assertEqual({'page', 'article', 'comment'}, kinds)

								        page_titles = {x[0] for x in test_posts if x[8] == 'page'}

								        self.assertEqual({'Test page', 'Test page 2'}, page_titles)

								        article_titles = {x[0] for x in test_posts if x[8] == 'article'}

								        self.assertEqual({'Black as Egypt\'s Night', 'The Steel Windpipe'},

								                         article_titles)

								        comment_titles = {x[0] for x in test_posts if x[8] == 'comment'}

								        self.assertEqual({'Mishka, always a pleasure to read your '

								                          'adventures!...'},

								                         comment_titles)


								    def test_recognise_status_with_correct_filename(self):

								        """Check that importerer outputs only statuses 'published' and 'draft',

								        that these are correctly identified and that filenames are correct.

								        """

								        test_posts = list(self.posts)

								        statuses = {x[7] for x in test_posts}

								        self.assertEqual({'published', 'draft'}, statuses)


								        draft_filenames = {x[2] for x in test_posts if x[7] == 'draft'}

								        # draft filenames are id-based

								        self.assertEqual({'page-4386962582497458967',

								                          'post-1276418104709695660'}, draft_filenames)


								        published_filenames = {x[2] for x in test_posts if x[7] == 'published'}

								        # published filenames are url-based, except comments

								        self.assertEqual({'the-steel-windpipe',

								                          'test-page',

								                          'post-5590533389087749201'}, published_filenames)


								@skipIfNoExecutable(['pandoc', '--version'])

								@unittest.skipUnless(BeautifulSoup, 'Needs BeautifulSoup module')

								class TestWordpressXmlImporter(unittest.TestCase):


								    def setUp(self):

								        self.old_locale = locale.setlocale(locale.LC_ALL)

								        locale.setlocale(locale.LC_ALL, 'C')

								        self.posts = wp2fields(WORDPRESS_XML_SAMPLE)

								        self.custposts = wp2fields(WORDPRESS_XML_SAMPLE, True)


								    def tearDown(self):

								        locale.setlocale(locale.LC_ALL, self.old_locale)


								    def test_ignore_empty_posts(self):

								        self.assertTrue(self.posts)

								        for (title, content, fname, date, author,

								                categ, tags, status, kind, format) in self.posts:

								            self.assertTrue(title.strip())


								    def test_recognise_page_kind(self):

								        """ Check that we recognise pages in wordpress, as opposed to posts """

								        self.assertTrue(self.posts)

								        # Collect (title, filename, kind) of non-empty posts recognised as page

								        pages_data = []

								        for (title, content, fname, date, author,

								                categ, tags, status, kind, format) in self.posts:

								            if kind == 'page':

								                pages_data.append((title, fname))

								        self.assertEqual(2, len(pages_data))

								        self.assertEqual(('Page', 'contact'), pages_data[0])

								        self.assertEqual(('Empty Page', 'empty'), pages_data[1])


								    def test_dirpage_directive_for_page_kind(self):

								        silent_f2p = mute(True)(fields2pelican)

								        test_post = filter(lambda p: p[0].startswith("Empty Page"), self.posts)

								        with temporary_folder() as temp:

								            fname = list(silent_f2p(test_post, 'markdown',

								                                    temp, dirpage=True))[0]

								            self.assertTrue(fname.endswith('pages%sempty.md' % os.path.sep))


								    def test_dircat(self):

								        silent_f2p = mute(True)(fields2pelican)

								        test_posts = []

								        for post in self.posts:

								            # check post kind

								            if len(post[5]) > 0:  # Has a category

								                test_posts.append(post)

								        with temporary_folder() as temp:

								            fnames = list(silent_f2p(test_posts, 'markdown',

								                                     temp, dircat=True))

								        subs = DEFAULT_CONFIG['SLUG_REGEX_SUBSTITUTIONS']

								        index = 0

								        for post in test_posts:

								            name = post[2]

								            category = slugify(post[5][0], regex_subs=subs, preserve_case=True)

								            name += '.md'

								            filename = os.path.join(category, name)

								            out_name = fnames[index]

								            self.assertTrue(out_name.endswith(filename))

								            index += 1


								    def test_unless_custom_post_all_items_should_be_pages_or_posts(self):

								        self.assertTrue(self.posts)

								        pages_data = []

								        for (title, content, fname, date, author, categ,

								                tags, status, kind, format) in self.posts:

								            if kind == 'page' or kind == 'article':

								                pass

								            else:

								                pages_data.append((title, fname))

								        self.assertEqual(0, len(pages_data))


								    def test_recognise_custom_post_type(self):

								        self.assertTrue(self.custposts)

								        cust_data = []

								        for (title, content, fname, date, author, categ,

								                tags, status, kind, format) in self.custposts:

								            if kind == 'article' or kind == 'page':

								                pass

								            else:

								                cust_data.append((title, kind))

								        self.assertEqual(3, len(cust_data))

								        self.assertEqual(

								            ('A custom post in category 4', 'custom1'),

								            cust_data[0])

								        self.assertEqual(

								            ('A custom post in category 5', 'custom1'),

								            cust_data[1])

								        self.assertEqual(

								            ('A 2nd custom post type also in category 5', 'custom2'),

								            cust_data[2])


								    def test_custom_posts_put_in_own_dir(self):

								        silent_f2p = mute(True)(fields2pelican)

								        test_posts = []

								        for post in self.custposts:

								            # check post kind

								            if post[8] == 'article' or post[8] == 'page':

								                pass

								            else:

								                test_posts.append(post)

								        with temporary_folder() as temp:

								            fnames = list(silent_f2p(test_posts, 'markdown',

								                                     temp, wp_custpost=True))

								        index = 0

								        for post in test_posts:

								            name = post[2]

								            kind = post[8]

								            name += '.md'

								            filename = os.path.join(kind, name)

								            out_name = fnames[index]

								            self.assertTrue(out_name.endswith(filename))

								            index += 1


								    def test_custom_posts_put_in_own_dir_and_catagory_sub_dir(self):

								        silent_f2p = mute(True)(fields2pelican)

								        test_posts = []

								        for post in self.custposts:

								            # check post kind

								            if post[8] == 'article' or post[8] == 'page':

								                pass

								            else:

								                test_posts.append(post)

								        with temporary_folder() as temp:

								            fnames = list(silent_f2p(test_posts, 'markdown', temp,

								                                     wp_custpost=True, dircat=True))

								        subs = DEFAULT_CONFIG['SLUG_REGEX_SUBSTITUTIONS']

								        index = 0

								        for post in test_posts:

								            name = post[2]

								            kind = post[8]

								            category = slugify(post[5][0], regex_subs=subs, preserve_case=True)

								            name += '.md'

								            filename = os.path.join(kind, category, name)

								            out_name = fnames[index]

								            self.assertTrue(out_name.endswith(filename))

								            index += 1


								    def test_wp_custpost_true_dirpage_false(self):

								        # pages should only be put in their own directory when dirpage = True

								        silent_f2p = mute(True)(fields2pelican)

								        test_posts = []

								        for post in self.custposts:

								            # check post kind

								            if post[8] == 'page':

								                test_posts.append(post)

								        with temporary_folder() as temp:

								            fnames = list(silent_f2p(test_posts, 'markdown', temp,

								                                     wp_custpost=True, dirpage=False))

								        index = 0

								        for post in test_posts:

								            name = post[2]

								            name += '.md'

								            filename = os.path.join('pages', name)

								            out_name = fnames[index]

								            self.assertFalse(out_name.endswith(filename))


								    def test_can_toggle_raw_html_code_parsing(self):

								        test_posts = list(self.posts)


								        def r(f):

								            with open(f, encoding='utf-8') as infile:

								                return infile.read()

								        silent_f2p = mute(True)(fields2pelican)


								        with temporary_folder() as temp:


								            rst_files = (r(f) for f

								                         in silent_f2p(test_posts, 'markdown', temp))

								            self.assertTrue(any('<iframe' in rst for rst in rst_files))

								            rst_files = (r(f) for f

								                         in silent_f2p(test_posts, 'markdown',

								                                       temp, strip_raw=True))

								            self.assertFalse(any('<iframe' in rst for rst in rst_files))

								            # no effect in rst

								            rst_files = (r(f) for f in silent_f2p(test_posts, 'rst', temp))

								            self.assertFalse(any('<iframe' in rst for rst in rst_files))

								            rst_files = (r(f) for f in silent_f2p(test_posts, 'rst', temp,

								                         strip_raw=True))

								            self.assertFalse(any('<iframe' in rst for rst in rst_files))


								    def test_decode_html_entities_in_titles(self):

								        test_posts = [post for post

								                      in self.posts if post[2] == 'html-entity-test']

								        self.assertEqual(len(test_posts), 1)


								        post = test_posts[0]

								        title = post[0]

								        self.assertTrue(title, "A normal post with some <html> entities in "

								                               "the title. You can't miss them.")

								        self.assertNotIn('&', title)


								    def test_decode_wp_content_returns_empty(self):

								        """ Check that given an empty string we return an empty string."""

								        self.assertEqual(decode_wp_content(""), "")


								    def test_decode_wp_content(self):

								        """ Check that we can decode a wordpress content string."""

								        with open(WORDPRESS_ENCODED_CONTENT_SAMPLE) as encoded_file:

								            encoded_content = encoded_file.read()

								            with open(WORDPRESS_DECODED_CONTENT_SAMPLE) as decoded_file:

								                decoded_content = decoded_file.read()

								                self.assertEqual(

								                    decode_wp_content(encoded_content, br=False),

								                    decoded_content)


								    def test_preserve_verbatim_formatting(self):

								        def r(f):

								            with open(f, encoding='utf-8') as infile:

								                return infile.read()

								        silent_f2p = mute(True)(fields2pelican)

								        test_post = filter(

								            lambda p: p[0].startswith("Code in List"),

								            self.posts)

								        with temporary_folder() as temp:

								            md = [r(f) for f in silent_f2p(test_post, 'markdown', temp)][0]

								            self.assertTrue(re.search(r'\s+a = \[1, 2, 3\]', md))

								            self.assertTrue(re.search(r'\s+b = \[4, 5, 6\]', md))


								            for_line = re.search(r'\s+for i in zip\(a, b\):', md).group(0)

								            print_line = re.search(r'\s+print i', md).group(0)

								            self.assertTrue(

								                for_line.rindex('for') < print_line.rindex('print'))


								    def test_code_in_list(self):

								        def r(f):

								            with open(f, encoding='utf-8') as infile:

								                return infile.read()

								        silent_f2p = mute(True)(fields2pelican)

								        test_post = filter(

								            lambda p: p[0].startswith("Code in List"),

								            self.posts)

								        with temporary_folder() as temp:

								            md = [r(f) for f in silent_f2p(test_post, 'markdown', temp)][0]

								            sample_line = re.search(r'-   This is a code sample', md).group(0)

								            code_line = re.search(r'\s+a = \[1, 2, 3\]', md).group(0)

								            self.assertTrue(sample_line.rindex('This') < code_line.rindex('a'))


								    def test_dont_use_smart_quotes(self):

								        def r(f):

								            with open(f, encoding='utf-8') as infile:

								                return infile.read()

								        silent_f2p = mute(True)(fields2pelican)

								        test_post = filter(

								            lambda p: p[0].startswith("Post with raw data"),

								            self.posts)

								        with temporary_folder() as temp:

								            md = [r(f) for f in silent_f2p(test_post, 'markdown', temp)][0]

								            escaped_quotes = re.search(r'\\[\'"“”‘’]', md)

								            self.assertFalse(escaped_quotes)


								class TestBuildHeader(unittest.TestCase):

								    def test_build_header(self):

								        header = build_header('test', None, None, None, None, None)

								        self.assertEqual(header, 'test\n####\n\n')


								    def test_build_header_with_fields(self):

								        header_data = [

								            'Test Post',

								            '2014-11-04',

								            'Alexis Métaireau',

								            ['Programming'],

								            ['Pelican', 'Python'],

								            'test-post',

								        ]


								        expected_docutils = '\n'.join([

								            'Test Post',

								            '#########',

								            ':date: 2014-11-04',

								            ':author: Alexis Métaireau',

								            ':category: Programming',

								            ':tags: Pelican, Python',

								            ':slug: test-post',

								            '\n',

								        ])


								        expected_md = '\n'.join([

								            'Title: Test Post',

								            'Date: 2014-11-04',

								            'Author: Alexis Métaireau',

								            'Category: Programming',

								            'Tags: Pelican, Python',

								            'Slug: test-post',

								            '\n',

								        ])


								        self.assertEqual(build_header(*header_data), expected_docutils)

								        self.assertEqual(build_markdown_header(*header_data), expected_md)


								    def test_build_header_with_east_asian_characters(self):

								        header = build_header('これは広い幅の文字だけで構成されたタイトルです',

								                              None, None, None, None, None)


								        self.assertEqual(header,

								                         ('これは広い幅の文字だけで構成されたタイトルです\n'

								                          '##############################################'

								                          '\n\n'))


								    def test_galleries_added_to_header(self):

								        header = build_header('test', None, None, None, None, None,

								                              attachments=['output/test1', 'output/test2'])

								        self.assertEqual(header, ('test\n####\n'

								                                  ':attachments: output/test1, '

								                                  'output/test2\n\n'))


								    def test_galleries_added_to_markdown_header(self):

								        header = build_markdown_header('test', None, None, None, None, None,

								                                       attachments=['output/test1',

								                                                    'output/test2'])

								        self.assertEqual(

								            header,

								            'Title: test\nAttachments: output/test1, output/test2\n\n')


								@unittest.skipUnless(BeautifulSoup, 'Needs BeautifulSoup module')

								@unittest.skipUnless(LXML, 'Needs lxml module')

								class TestWordpressXMLAttachements(unittest.TestCase):

								    def setUp(self):

								        self.old_locale = locale.setlocale(locale.LC_ALL)

								        locale.setlocale(locale.LC_ALL, 'C')

								        self.attachments = get_attachments(WORDPRESS_XML_SAMPLE)


								    def tearDown(self):

								        locale.setlocale(locale.LC_ALL, self.old_locale)


								    def test_recognise_attachments(self):

								        self.assertTrue(self.attachments)

								        self.assertTrue(len(self.attachments.keys()) == 3)


								    def test_attachments_associated_with_correct_post(self):

								        self.assertTrue(self.attachments)

								        for post in self.attachments.keys():

								            if post is None:

								                expected = {

								                    ('https://upload.wikimedia.org/wikipedia/commons/'

								                     'thumb/2/2c/Pelican_lakes_entrance02.jpg/'

								                     '240px-Pelican_lakes_entrance02.jpg')

								                }

								                self.assertEqual(self.attachments[post], expected)

								            elif post == 'with-excerpt':

								                expected_invalid = ('http://thisurlisinvalid.notarealdomain/'

								                                    'not_an_image.jpg')

								                expected_pelikan = ('http://en.wikipedia.org/wiki/'

								                                    'File:Pelikan_Walvis_Bay.jpg')

								                self.assertEqual(self.attachments[post],

								                                 {expected_invalid, expected_pelikan})

								            elif post == 'with-tags':

								                expected_invalid = ('http://thisurlisinvalid.notarealdomain')

								                self.assertEqual(self.attachments[post], {expected_invalid})

								            else:

								                self.fail('all attachments should match to a '

								                          'filename or None, {}'

								                          .format(post))


								    def test_download_attachments(self):

								        real_file = os.path.join(CUR_DIR, 'content/article.rst')

								        good_url = path_to_file_url(real_file)

								        bad_url = 'http://localhost:1/not_a_file.txt'

								        silent_da = mute()(download_attachments)

								        with temporary_folder() as temp:

								            locations = list(silent_da(temp, [good_url, bad_url]))

								            self.assertEqual(1, len(locations))

								            directory = locations[0]

								            self.assertTrue(

								                directory.endswith(posix_join('content', 'article.rst')),

								                directory)