import os from unittest.mock import patch from pelican import readers from pelican.tests.support import get_settings, unittest from pelican.utils import SafeDatetime CUR_DIR = os.path.dirname(__file__) CONTENT_PATH = os.path.join(CUR_DIR, 'content') def _path(*args): return os.path.join(CONTENT_PATH, *args) class ReaderTest(unittest.TestCase): def read_file(self, path, **kwargs): # Isolate from future API changes to readers.read_file r = readers.Readers(settings=get_settings(**kwargs)) return r.read_file(base_path=CONTENT_PATH, path=path) def assertDictHasSubset(self, dictionary, subset): for key, value in subset.items(): if key in dictionary: real_value = dictionary.get(key) self.assertEqual( value, real_value, 'Expected %s to have value %s, but was %s' % (key, value, real_value)) else: self.fail( 'Expected %s to have value %s, but was not in Dict' % (key, value)) class TestAssertDictHasSubset(ReaderTest): def setUp(self): self.dictionary = { 'key-a': 'val-a', 'key-b': 'val-b' } def tearDown(self): self.dictionary = None def test_subset(self): self.assertDictHasSubset(self.dictionary, {'key-a': 'val-a'}) def test_equal(self): self.assertDictHasSubset(self.dictionary, self.dictionary) def test_fail_not_set(self): self.assertRaisesRegex( AssertionError, r'Expected.*key-c.*to have value.*val-c.*but was not in Dict', self.assertDictHasSubset, self.dictionary, {'key-c': 'val-c'}) def test_fail_wrong_val(self): self.assertRaisesRegex( AssertionError, r'Expected .*key-a.* to have value .*val-b.* but was .*val-a.*', self.assertDictHasSubset, self.dictionary, {'key-a': 'val-b'}) class DefaultReaderTest(ReaderTest): def test_readfile_unknown_extension(self): with self.assertRaises(TypeError): self.read_file(path='article_with_metadata.unknownextension') def test_readfile_path_metadata_implicit_dates(self): test_file = 'article_with_metadata_implicit_dates.html' page = self.read_file(path=test_file, DEFAULT_DATE='fs') expected = { 'date': SafeDatetime.fromtimestamp( os.stat(_path(test_file)).st_mtime), 'modified': SafeDatetime.fromtimestamp( os.stat(_path(test_file)).st_mtime) } self.assertDictHasSubset(page.metadata, expected) def test_readfile_path_metadata_explicit_dates(self): test_file = 'article_with_metadata_explicit_dates.html' page = self.read_file(path=test_file, DEFAULT_DATE='fs') expected = { 'date': SafeDatetime(2010, 12, 2, 10, 14), 'modified': SafeDatetime(2010, 12, 31, 23, 59) } self.assertDictHasSubset(page.metadata, expected) def test_readfile_path_metadata_implicit_date_explicit_modified(self): test_file = 'article_with_metadata_implicit_date_explicit_modified.html' page = self.read_file(path=test_file, DEFAULT_DATE='fs') expected = { 'date': SafeDatetime.fromtimestamp( os.stat(_path(test_file)).st_mtime), 'modified': SafeDatetime(2010, 12, 2, 10, 14), } self.assertDictHasSubset(page.metadata, expected) def test_readfile_path_metadata_explicit_date_implicit_modified(self): test_file = 'article_with_metadata_explicit_date_implicit_modified.html' page = self.read_file(path=test_file, DEFAULT_DATE='fs') expected = { 'date': SafeDatetime(2010, 12, 2, 10, 14), 'modified': SafeDatetime.fromtimestamp( os.stat(_path(test_file)).st_mtime) } self.assertDictHasSubset(page.metadata, expected) def test_find_empty_alt(self): with patch('pelican.readers.logger') as log_mock: content = ['', ''] for tag in content: readers.find_empty_alt(tag, '/test/path') log_mock.warning.assert_called_with( 'Empty alt attribute for image %s in %s', 'test-image.png', '/test/path', extra={'limit_msg': 'Other images have empty alt attributes'} ) class RstReaderTest(ReaderTest): def test_article_with_metadata(self): page = self.read_file(path='article_with_metadata.rst') expected = { 'category': 'yeah', 'author': 'Alexis Métaireau', 'title': 'This is a super article !', 'summary': '
Multi-line metadata should be' ' supported\nas well as inline' ' markup and stuff to "typogrify' '"...
\n', 'date': SafeDatetime(2010, 12, 2, 10, 14), 'modified': SafeDatetime(2010, 12, 2, 10, 20), 'tags': ['foo', 'bar', 'foobar'], 'custom_field': 'http://notmyidea.org', } self.assertDictHasSubset(page.metadata, expected) def test_article_with_capitalized_metadata(self): page = self.read_file(path='article_with_capitalized_metadata.rst') expected = { 'category': 'yeah', 'author': 'Alexis Métaireau', 'title': 'This is a super article !', 'summary': 'Multi-line metadata should be' ' supported\nas well as inline' ' markup and stuff to "typogrify' '"...
\n', 'date': SafeDatetime(2010, 12, 2, 10, 14), 'modified': SafeDatetime(2010, 12, 2, 10, 20), 'tags': ['foo', 'bar', 'foobar'], 'custom_field': 'http://notmyidea.org', } self.assertDictHasSubset(page.metadata, expected) def test_article_with_filename_metadata(self): page = self.read_file( path='2012-11-29_rst_w_filename_meta#foo-bar.rst', FILENAME_METADATA=None) expected = { 'category': 'yeah', 'author': 'Alexis Métaireau', 'title': 'Rst with filename metadata', 'reader': 'rst', } self.assertDictHasSubset(page.metadata, expected) page = self.read_file( path='2012-11-29_rst_w_filename_meta#foo-bar.rst', FILENAME_METADATA=r'(?PTHIS is some content. With some stuff to ' '"typogrify"...
\nNow with added ' 'support for ' 'TLA.
\n') self.assertEqual(page.content, expected) try: # otherwise, typogrify should be applied page = self.read_file(path='article.rst', TYPOGRIFY=True) expected = ( 'THIS is some content. ' 'With some stuff to “typogrify”…
\n' 'Now with added support for TLA.
\n') self.assertEqual(page.content, expected) except ImportError: return unittest.skip('need the typogrify distribution') def test_typogrify_summary(self): # if nothing is specified in the settings, the summary should be # unmodified page = self.read_file(path='article_with_metadata.rst') expected = ('Multi-line metadata should be' ' supported\nas well as inline' ' markup and stuff to "typogrify' '"...
\n') self.assertEqual(page.metadata['summary'], expected) try: # otherwise, typogrify should be applied page = self.read_file(path='article_with_metadata.rst', TYPOGRIFY=True) expected = ('Multi-line metadata should be' ' supported\nas well as inline' ' markup and stuff to “typogrify' '”…
\n') self.assertEqual(page.metadata['summary'], expected) except ImportError: return unittest.skip('need the typogrify distribution') def test_typogrify_ignore_tags(self): try: # typogrify should be able to ignore user specified tags, # but tries to be clever with widont extension page = self.read_file(path='article.rst', TYPOGRIFY=True, TYPOGRIFY_IGNORE_TAGS=['p']) expected = ('THIS is some content. With some stuff to ' '"typogrify"...
\nNow with added ' 'support for ' 'TLA.
\n') self.assertEqual(page.content, expected) # typogrify should ignore code blocks by default because # code blocks are composed inside the pre tag page = self.read_file(path='article_with_code_block.rst', TYPOGRIFY=True) expected = ('An article with some code
\n' ''
'x'
' &'
' y\n
A block quote:
\n\nx ' '& y\n' '
Normal:\nx' ' &' ' y' '
\n') self.assertEqual(page.content, expected) # instruct typogrify to also ignore blockquotes page = self.read_file(path='article_with_code_block.rst', TYPOGRIFY=True, TYPOGRIFY_IGNORE_TAGS=['blockquote']) expected = ('An article with some code
\n' ''
'x'
' &'
' y\n
A block quote:
\n\nx ' '& y\n' '
Normal:\nx' ' &' ' y' '
\n') self.assertEqual(page.content, expected) except ImportError: return unittest.skip('need the typogrify distribution') except TypeError: return unittest.skip('need typogrify version 2.0.4 or later') def test_article_with_multiple_authors(self): page = self.read_file(path='article_with_multiple_authors.rst') expected = { 'authors': ['First Author', 'Second Author'] } self.assertDictHasSubset(page.metadata, expected) def test_article_with_multiple_authors_semicolon(self): page = self.read_file( path='article_with_multiple_authors_semicolon.rst') expected = { 'authors': ['Author, First', 'Author, Second'] } self.assertDictHasSubset(page.metadata, expected) def test_article_with_multiple_authors_list(self): page = self.read_file(path='article_with_multiple_authors_list.rst') expected = { 'authors': ['Author, First', 'Author, Second'] } self.assertDictHasSubset(page.metadata, expected) def test_default_date_formats(self): tuple_date = self.read_file(path='article.rst', DEFAULT_DATE=(2012, 5, 1)) string_date = self.read_file(path='article.rst', DEFAULT_DATE='2012-05-01') self.assertEqual(tuple_date.metadata['date'], string_date.metadata['date']) def test_parse_error(self): # Verify that it raises an Exception, not nothing and not SystemExit or # some such with self.assertRaisesRegex(Exception, "underline too short"): self.read_file(path='../parse_error/parse_error.rst') def test_typogrify_dashes_config(self): # Test default config page = self.read_file( path='article_with_typogrify_dashes.rst', TYPOGRIFY=True, TYPOGRIFY_DASHES='default') expected = "One: -; Two: —; Three: —-
\n" expected_title = "One -, two —, three —- dashes!" self.assertEqual(page.content, expected) self.assertEqual(page.title, expected_title) # Test 'oldschool' variant page = self.read_file( path='article_with_typogrify_dashes.rst', TYPOGRIFY=True, TYPOGRIFY_DASHES='oldschool') expected = "One: -; Two: –; Three: —
\n" expected_title = "One -, two –, three — dashes!" self.assertEqual(page.content, expected) self.assertEqual(page.title, expected_title) # Test 'oldschool_inverted' variant page = self.read_file( path='article_with_typogrify_dashes.rst', TYPOGRIFY=True, TYPOGRIFY_DASHES='oldschool_inverted') expected = "One: -; Two: —; Three: –
\n" expected_title = "One -, two —, three – dashes!" self.assertEqual(page.content, expected) self.assertEqual(page.title, expected_title) @unittest.skipUnless(readers.Markdown, "markdown isn't installed") class MdReaderTest(ReaderTest): def test_article_with_metadata(self): reader = readers.MarkdownReader(settings=get_settings()) content, metadata = reader.read( _path('article_with_md_extension.md')) expected = { 'category': 'test', 'title': 'Test md File', 'summary': 'I have a lot to test
', 'date': SafeDatetime(2010, 12, 2, 10, 14), 'modified': SafeDatetime(2010, 12, 2, 10, 20), 'tags': ['foo', 'bar', 'foobar'], } self.assertDictHasSubset(metadata, expected) content, metadata = reader.read( _path('article_with_markdown_and_nonascii_summary.md')) expected = { 'title': 'マックOS X 10.8でパイソンとVirtualenvをインストールと設定', 'summary': 'パイソンとVirtualenvをまっくでインストールする方法について明確に説明します。
', 'category': '指導書', 'date': SafeDatetime(2012, 12, 20), 'modified': SafeDatetime(2012, 12, 22), 'tags': ['パイソン', 'マック'], 'slug': 'python-virtualenv-on-mac-osx-mountain-lion-10.8', } self.assertDictHasSubset(metadata, expected) def test_article_with_footnote(self): settings = get_settings() ec = settings['MARKDOWN']['extension_configs'] ec['markdown.extensions.footnotes'] = {'SEPARATOR': '-'} reader = readers.MarkdownReader(settings) content, metadata = reader.read( _path('article_with_markdown_and_footnote.md')) expected_content = ( 'This is some content' '1' ' with some footnotes' '2
\n' '') expected_metadata = { 'title': 'Article with markdown containing footnotes', 'summary': ( 'Summary with inline markup ' 'should be supported.
'), 'date': SafeDatetime(2012, 10, 31), 'modified': SafeDatetime(2012, 11, 1), 'multiline': [ 'Line Metadata should be handle properly.', 'See syntax of Meta-Data extension of ' 'Python Markdown package:', 'If a line is indented by 4 or more spaces,', 'that line is assumed to be an additional line of the value', 'for the previous keyword.', 'A keyword may have as many lines as desired.', ] } self.assertEqual(content, expected_content) self.assertDictHasSubset(metadata, expected_metadata) def test_article_with_file_extensions(self): reader = readers.MarkdownReader(settings=get_settings()) # test to ensure the md file extension is being processed by the # correct reader content, metadata = reader.read( _path('article_with_md_extension.md')) expected = ( "The quick brown fox jumped over the lazy dog's back.
") self.assertEqual(content, expected) # test to ensure the mkd file extension is being processed by the # correct reader content, metadata = reader.read( _path('article_with_mkd_extension.mkd')) expected = ("This is another markdown test file. Uses" " the mkd extension.
") self.assertEqual(content, expected) # test to ensure the markdown file extension is being processed by the # correct reader content, metadata = reader.read( _path('article_with_markdown_extension.markdown')) expected = ("This is another markdown test file. Uses" " the markdown extension.
") self.assertEqual(content, expected) # test to ensure the mdown file extension is being processed by the # correct reader content, metadata = reader.read( _path('article_with_mdown_extension.mdown')) expected = ("This is another markdown test file. Uses" " the mdown extension.
") self.assertEqual(content, expected) def test_article_with_markdown_markup_extension(self): # test to ensure the markdown markup extension is being processed as # expected page = self.read_file( path='article_with_markdown_markup_extensions.md', MARKDOWN={ 'extension_configs': { 'markdown.extensions.toc': {}, 'markdown.extensions.codehilite': {}, 'markdown.extensions.extra': {} } } ) expected = ('\n' 'Test: This metadata value looks like metadata
', } self.assertDictHasSubset(metadata, expected) def test_empty_file(self): reader = readers.MarkdownReader(settings=get_settings()) content, metadata = reader.read( _path('empty.md')) self.assertEqual(metadata, {}) self.assertEqual(content, '') def test_empty_file_with_bom(self): reader = readers.MarkdownReader(settings=get_settings()) content, metadata = reader.read( _path('empty_with_bom.md')) self.assertEqual(metadata, {}) self.assertEqual(content, '') def test_typogrify_dashes_config(self): # Test default config page = self.read_file( path='article_with_typogrify_dashes.md', TYPOGRIFY=True, TYPOGRIFY_DASHES='default') expected = "One: -; Two: —; Three: —-
" expected_title = "One -, two —, three —- dashes!" self.assertEqual(page.content, expected) self.assertEqual(page.title, expected_title) # Test 'oldschool' variant page = self.read_file( path='article_with_typogrify_dashes.md', TYPOGRIFY=True, TYPOGRIFY_DASHES='oldschool') expected = "One: -; Two: –; Three: —
" expected_title = "One -, two –, three — dashes!" self.assertEqual(page.content, expected) self.assertEqual(page.title, expected_title) # Test 'oldschool_inverted' variant page = self.read_file( path='article_with_typogrify_dashes.md', TYPOGRIFY=True, TYPOGRIFY_DASHES='oldschool_inverted') expected = "One: -; Two: —; Three: –
" expected_title = "One -, two —, three – dashes!" self.assertEqual(page.content, expected) self.assertEqual(page.title, expected_title) class HTMLReaderTest(ReaderTest): def test_article_with_comments(self): page = self.read_file(path='article_with_comments.html') self.assertEqual(''' Body content ''', page.content) def test_article_with_keywords(self): page = self.read_file(path='article_with_keywords.html') expected = { 'tags': ['foo', 'bar', 'foobar'], } self.assertDictHasSubset(page.metadata, expected) def test_article_with_metadata(self): page = self.read_file(path='article_with_metadata.html') expected = { 'category': 'yeah', 'author': 'Alexis Métaireau', 'title': 'This is a super article !', 'summary': 'Summary and stuff', 'date': SafeDatetime(2010, 12, 2, 10, 14), 'tags': ['foo', 'bar', 'foobar'], 'custom_field': 'http://notmyidea.org', } self.assertDictHasSubset(page.metadata, expected) def test_article_with_multiple_similar_metadata_tags(self): page = self.read_file(path='article_with_multiple_metadata_tags.html') expected = { 'custom_field': ['https://getpelican.com', 'https://www.eff.org'], } self.assertDictHasSubset(page.metadata, expected) def test_article_with_multiple_authors(self): page = self.read_file(path='article_with_multiple_authors.html') expected = { 'authors': ['First Author', 'Second Author'] } self.assertDictHasSubset(page.metadata, expected) def test_article_with_metadata_and_contents_attrib(self): page = self.read_file(path='article_with_metadata_and_contents.html') expected = { 'category': 'yeah', 'author': 'Alexis Métaireau', 'title': 'This is a super article !', 'summary': 'Summary and stuff', 'date': SafeDatetime(2010, 12, 2, 10, 14), 'tags': ['foo', 'bar', 'foobar'], 'custom_field': 'http://notmyidea.org', } self.assertDictHasSubset(page.metadata, expected) def test_article_with_null_attributes(self): page = self.read_file(path='article_with_null_attributes.html') self.assertEqual(''' Ensure that empty attributes are copied properly. ''', page.content) def test_article_with_attributes_containing_double_quotes(self): page = self.read_file(path='article_with_attributes_containing_' + 'double_quotes.html') self.assertEqual(''' Ensure that if an attribute value contains a double quote, it is surrounded with single quotes, otherwise with double quotes. Span content Span content Span content ''', page.content) def test_article_metadata_key_lowercase(self): # Keys of metadata should be lowercase. page = self.read_file(path='article_with_uppercase_metadata.html') # Key should be lowercase self.assertIn('category', page.metadata, 'Key should be lowercase.') # Value should keep cases self.assertEqual('Yeah', page.metadata.get('category')) def test_article_with_nonconformant_meta_tags(self): page = self.read_file(path='article_with_nonconformant_meta_tags.html') expected = { 'summary': 'Summary and stuff', 'title': 'Article with Nonconformant HTML meta tags', } self.assertDictHasSubset(page.metadata, expected) def test_article_with_inline_svg(self): page = self.read_file(path='article_with_inline_svg.html') expected = { 'title': 'Article with an inline SVG', } self.assertDictHasSubset(page.metadata, expected)