import os from unittest.mock import patch from pelican import readers from pelican.tests.support import get_settings, unittest from pelican.utils import SafeDatetime CUR_DIR = os.path.dirname(__file__) CONTENT_PATH = os.path.join(CUR_DIR, 'content') def _path(*args): return os.path.join(CONTENT_PATH, *args) class ReaderTest(unittest.TestCase): def read_file(self, path, **kwargs): # Isolate from future API changes to readers.read_file r = readers.Readers(settings=get_settings(**kwargs)) return r.read_file(base_path=CONTENT_PATH, path=path) def assertDictHasSubset(self, dictionary, subset): for key, value in subset.items(): if key in dictionary: real_value = dictionary.get(key) self.assertEqual( value, real_value, 'Expected %s to have value %s, but was %s' % (key, value, real_value)) else: self.fail( 'Expected %s to have value %s, but was not in Dict' % (key, value)) class TestAssertDictHasSubset(ReaderTest): def setUp(self): self.dictionary = { 'key-a': 'val-a', 'key-b': 'val-b' } def tearDown(self): self.dictionary = None def test_subset(self): self.assertDictHasSubset(self.dictionary, {'key-a': 'val-a'}) def test_equal(self): self.assertDictHasSubset(self.dictionary, self.dictionary) def test_fail_not_set(self): self.assertRaisesRegex( AssertionError, r'Expected.*key-c.*to have value.*val-c.*but was not in Dict', self.assertDictHasSubset, self.dictionary, {'key-c': 'val-c'}) def test_fail_wrong_val(self): self.assertRaisesRegex( AssertionError, r'Expected .*key-a.* to have value .*val-b.* but was .*val-a.*', self.assertDictHasSubset, self.dictionary, {'key-a': 'val-b'}) class DefaultReaderTest(ReaderTest): def test_readfile_unknown_extension(self): with self.assertRaises(TypeError): self.read_file(path='article_with_metadata.unknownextension') def test_readfile_path_metadata_implicit_dates(self): test_file = 'article_with_metadata_implicit_dates.html' page = self.read_file(path=test_file, DEFAULT_DATE='fs') expected = { 'date': SafeDatetime.fromtimestamp( os.stat(_path(test_file)).st_mtime), 'modified': SafeDatetime.fromtimestamp( os.stat(_path(test_file)).st_mtime) } self.assertDictHasSubset(page.metadata, expected) def test_readfile_path_metadata_explicit_dates(self): test_file = 'article_with_metadata_explicit_dates.html' page = self.read_file(path=test_file, DEFAULT_DATE='fs') expected = { 'date': SafeDatetime(2010, 12, 2, 10, 14), 'modified': SafeDatetime(2010, 12, 31, 23, 59) } self.assertDictHasSubset(page.metadata, expected) def test_readfile_path_metadata_implicit_date_explicit_modified(self): test_file = 'article_with_metadata_implicit_date_explicit_modified.html' page = self.read_file(path=test_file, DEFAULT_DATE='fs') expected = { 'date': SafeDatetime.fromtimestamp( os.stat(_path(test_file)).st_mtime), 'modified': SafeDatetime(2010, 12, 2, 10, 14), } self.assertDictHasSubset(page.metadata, expected) def test_readfile_path_metadata_explicit_date_implicit_modified(self): test_file = 'article_with_metadata_explicit_date_implicit_modified.html' page = self.read_file(path=test_file, DEFAULT_DATE='fs') expected = { 'date': SafeDatetime(2010, 12, 2, 10, 14), 'modified': SafeDatetime.fromtimestamp( os.stat(_path(test_file)).st_mtime) } self.assertDictHasSubset(page.metadata, expected) def test_find_empty_alt(self): with patch('pelican.readers.logger') as log_mock: content = ['', ''] for tag in content: readers.find_empty_alt(tag, '/test/path') log_mock.warning.assert_called_with( 'Empty alt attribute for image %s in %s', 'test-image.png', '/test/path', extra={'limit_msg': 'Other images have empty alt attributes'} ) class RstReaderTest(ReaderTest): def test_article_with_metadata(self): page = self.read_file(path='article_with_metadata.rst') expected = { 'category': 'yeah', 'author': 'Alexis Métaireau', 'title': 'This is a super article !', 'summary': '

Multi-line metadata should be' ' supported\nas well as inline' ' markup and stuff to "typogrify' '"...

\n', 'date': SafeDatetime(2010, 12, 2, 10, 14), 'modified': SafeDatetime(2010, 12, 2, 10, 20), 'tags': ['foo', 'bar', 'foobar'], 'custom_field': 'http://notmyidea.org', } self.assertDictHasSubset(page.metadata, expected) def test_article_with_capitalized_metadata(self): page = self.read_file(path='article_with_capitalized_metadata.rst') expected = { 'category': 'yeah', 'author': 'Alexis Métaireau', 'title': 'This is a super article !', 'summary': '

Multi-line metadata should be' ' supported\nas well as inline' ' markup and stuff to "typogrify' '"...

\n', 'date': SafeDatetime(2010, 12, 2, 10, 14), 'modified': SafeDatetime(2010, 12, 2, 10, 20), 'tags': ['foo', 'bar', 'foobar'], 'custom_field': 'http://notmyidea.org', } self.assertDictHasSubset(page.metadata, expected) def test_article_with_filename_metadata(self): page = self.read_file( path='2012-11-29_rst_w_filename_meta#foo-bar.rst', FILENAME_METADATA=None) expected = { 'category': 'yeah', 'author': 'Alexis Métaireau', 'title': 'Rst with filename metadata', 'reader': 'rst', } self.assertDictHasSubset(page.metadata, expected) page = self.read_file( path='2012-11-29_rst_w_filename_meta#foo-bar.rst', FILENAME_METADATA=r'(?P\d{4}-\d{2}-\d{2}).*') expected = { 'category': 'yeah', 'author': 'Alexis Métaireau', 'title': 'Rst with filename metadata', 'date': SafeDatetime(2012, 11, 29), 'reader': 'rst', } self.assertDictHasSubset(page.metadata, expected) page = self.read_file( path='2012-11-29_rst_w_filename_meta#foo-bar.rst', FILENAME_METADATA=( r'(?P\d{4}-\d{2}-\d{2})' r'_(?P.*)' r'#(?P.*)-(?P.*)')) expected = { 'category': 'yeah', 'author': 'Alexis Métaireau', 'title': 'Rst with filename metadata', 'date': SafeDatetime(2012, 11, 29), 'slug': 'rst_w_filename_meta', 'mymeta': 'foo', 'reader': 'rst', } self.assertDictHasSubset(page.metadata, expected) def test_article_with_optional_filename_metadata(self): page = self.read_file( path='2012-11-29_rst_w_filename_meta#foo-bar.rst', FILENAME_METADATA=r'(?P\d{4}-\d{2}-\d{2})?') expected = { 'date': SafeDatetime(2012, 11, 29), 'reader': 'rst', } self.assertDictHasSubset(page.metadata, expected) page = self.read_file( path='article.rst', FILENAME_METADATA=r'(?P\d{4}-\d{2}-\d{2})?') expected = { 'reader': 'rst', } self.assertDictHasSubset(page.metadata, expected) self.assertNotIn('date', page.metadata, 'Date should not be set.') def test_article_metadata_key_lowercase(self): # Keys of metadata should be lowercase. reader = readers.RstReader(settings=get_settings()) content, metadata = reader.read( _path('article_with_uppercase_metadata.rst')) self.assertIn('category', metadata, 'Key should be lowercase.') self.assertEqual('Yeah', metadata.get('category'), 'Value keeps case.') def test_article_extra_path_metadata(self): input_with_metadata = '2012-11-29_rst_w_filename_meta#foo-bar.rst' page_metadata = self.read_file( path=input_with_metadata, FILENAME_METADATA=( r'(?P\d{4}-\d{2}-\d{2})' r'_(?P.*)' r'#(?P.*)-(?P.*)' ), EXTRA_PATH_METADATA={ input_with_metadata: { 'key-1a': 'value-1a', 'key-1b': 'value-1b' } } ) expected_metadata = { 'category': 'yeah', 'author': 'Alexis Métaireau', 'title': 'Rst with filename metadata', 'date': SafeDatetime(2012, 11, 29), 'slug': 'rst_w_filename_meta', 'mymeta': 'foo', 'reader': 'rst', 'key-1a': 'value-1a', 'key-1b': 'value-1b' } self.assertDictHasSubset(page_metadata.metadata, expected_metadata) input_file_path_without_metadata = 'article.rst' page_without_metadata = self.read_file( path=input_file_path_without_metadata, EXTRA_PATH_METADATA={ input_file_path_without_metadata: { 'author': 'Charlès Overwrite' } } ) expected_without_metadata = { 'category': 'misc', 'author': 'Charlès Overwrite', 'title': 'Article title', 'reader': 'rst', } self.assertDictHasSubset( page_without_metadata.metadata, expected_without_metadata) def test_article_extra_path_metadata_dont_overwrite(self): # EXTRA_PATH_METADATA['author'] should get ignored # since we don't overwrite already set values input_file_path = '2012-11-29_rst_w_filename_meta#foo-bar.rst' page = self.read_file( path=input_file_path, FILENAME_METADATA=( r'(?P\d{4}-\d{2}-\d{2})' r'_(?P.*)' r'#(?P.*)-(?P.*)' ), EXTRA_PATH_METADATA={ input_file_path: { 'author': 'Charlès Overwrite', 'key-1b': 'value-1b' } } ) expected = { 'category': 'yeah', 'author': 'Alexis Métaireau', 'title': 'Rst with filename metadata', 'date': SafeDatetime(2012, 11, 29), 'slug': 'rst_w_filename_meta', 'mymeta': 'foo', 'reader': 'rst', 'key-1b': 'value-1b' } self.assertDictHasSubset(page.metadata, expected) def test_article_extra_path_metadata_recurse(self): parent = "TestCategory" notparent = "TestCategory/article" path = "TestCategory/article_without_category.rst" epm = { parent: {'epmr_inherit': parent, 'epmr_override': parent, }, notparent: {'epmr_bogus': notparent}, path: {'epmr_override': path, }, } expected_metadata = { 'epmr_inherit': parent, 'epmr_override': path, } page = self.read_file(path=path, EXTRA_PATH_METADATA=epm) self.assertDictHasSubset(page.metadata, expected_metadata) # Make sure vars aren't getting "inherited" by mistake... path = "article.rst" page = self.read_file(path=path, EXTRA_PATH_METADATA=epm) for k in expected_metadata.keys(): self.assertNotIn(k, page.metadata) # Same, but for edge cases where one file's name is a prefix of # another. path = "TestCategory/article_without_category.rst" page = self.read_file(path=path, EXTRA_PATH_METADATA=epm) for k in epm[notparent].keys(): self.assertNotIn(k, page.metadata) def test_typogrify(self): # if nothing is specified in the settings, the content should be # unmodified page = self.read_file(path='article.rst') expected = ('

THIS is some content. With some stuff to ' '"typogrify"...

\n

Now with added ' 'support for ' 'TLA.

\n') self.assertEqual(page.content, expected) try: # otherwise, typogrify should be applied page = self.read_file(path='article.rst', TYPOGRIFY=True) expected = ( '

THIS is some content. ' 'With some stuff to “typogrify”…

\n' '

Now with added support for TLA.

\n') self.assertEqual(page.content, expected) except ImportError: return unittest.skip('need the typogrify distribution') def test_typogrify_summary(self): # if nothing is specified in the settings, the summary should be # unmodified page = self.read_file(path='article_with_metadata.rst') expected = ('

Multi-line metadata should be' ' supported\nas well as inline' ' markup and stuff to "typogrify' '"...

\n') self.assertEqual(page.metadata['summary'], expected) try: # otherwise, typogrify should be applied page = self.read_file(path='article_with_metadata.rst', TYPOGRIFY=True) expected = ('

Multi-line metadata should be' ' supported\nas well as inline' ' markup and stuff to “typogrify' '”…

\n') self.assertEqual(page.metadata['summary'], expected) except ImportError: return unittest.skip('need the typogrify distribution') def test_typogrify_ignore_tags(self): try: # typogrify should be able to ignore user specified tags, # but tries to be clever with widont extension page = self.read_file(path='article.rst', TYPOGRIFY=True, TYPOGRIFY_IGNORE_TAGS=['p']) expected = ('

THIS is some content. With some stuff to ' '"typogrify"...

\n

Now with added ' 'support for ' 'TLA.

\n') self.assertEqual(page.content, expected) # typogrify should ignore code blocks by default because # code blocks are composed inside the pre tag page = self.read_file(path='article_with_code_block.rst', TYPOGRIFY=True) expected = ('

An article with some code

\n' '
'
                        'x'
                        ' &'
                        ' y\n
\n' '

A block quote:

\n
\nx ' '& y
\n' '

Normal:\nx' ' &' ' y' '

\n') self.assertEqual(page.content, expected) # instruct typogrify to also ignore blockquotes page = self.read_file(path='article_with_code_block.rst', TYPOGRIFY=True, TYPOGRIFY_IGNORE_TAGS=['blockquote']) expected = ('

An article with some code

\n' '
'
                        'x'
                        ' &'
                        ' y\n
\n' '

A block quote:

\n
\nx ' '& y
\n' '

Normal:\nx' ' &' ' y' '

\n') self.assertEqual(page.content, expected) except ImportError: return unittest.skip('need the typogrify distribution') except TypeError: return unittest.skip('need typogrify version 2.0.4 or later') def test_article_with_multiple_authors(self): page = self.read_file(path='article_with_multiple_authors.rst') expected = { 'authors': ['First Author', 'Second Author'] } self.assertDictHasSubset(page.metadata, expected) def test_article_with_multiple_authors_semicolon(self): page = self.read_file( path='article_with_multiple_authors_semicolon.rst') expected = { 'authors': ['Author, First', 'Author, Second'] } self.assertDictHasSubset(page.metadata, expected) def test_article_with_multiple_authors_list(self): page = self.read_file(path='article_with_multiple_authors_list.rst') expected = { 'authors': ['Author, First', 'Author, Second'] } self.assertDictHasSubset(page.metadata, expected) def test_default_date_formats(self): tuple_date = self.read_file(path='article.rst', DEFAULT_DATE=(2012, 5, 1)) string_date = self.read_file(path='article.rst', DEFAULT_DATE='2012-05-01') self.assertEqual(tuple_date.metadata['date'], string_date.metadata['date']) def test_parse_error(self): # Verify that it raises an Exception, not nothing and not SystemExit or # some such with self.assertRaisesRegex(Exception, "underline too short"): self.read_file(path='../parse_error/parse_error.rst') def test_typogrify_dashes_config(self): # Test default config page = self.read_file( path='article_with_typogrify_dashes.rst', TYPOGRIFY=True, TYPOGRIFY_DASHES='default') expected = "

One: -; Two: —; Three: —-

\n" expected_title = "One -, two —, three —- dashes!" self.assertEqual(page.content, expected) self.assertEqual(page.title, expected_title) # Test 'oldschool' variant page = self.read_file( path='article_with_typogrify_dashes.rst', TYPOGRIFY=True, TYPOGRIFY_DASHES='oldschool') expected = "

One: -; Two: –; Three: —

\n" expected_title = "One -, two –, three — dashes!" self.assertEqual(page.content, expected) self.assertEqual(page.title, expected_title) # Test 'oldschool_inverted' variant page = self.read_file( path='article_with_typogrify_dashes.rst', TYPOGRIFY=True, TYPOGRIFY_DASHES='oldschool_inverted') expected = "

One: -; Two: —; Three: –

\n" expected_title = "One -, two —, three – dashes!" self.assertEqual(page.content, expected) self.assertEqual(page.title, expected_title) @unittest.skipUnless(readers.Markdown, "markdown isn't installed") class MdReaderTest(ReaderTest): def test_article_with_metadata(self): reader = readers.MarkdownReader(settings=get_settings()) content, metadata = reader.read( _path('article_with_md_extension.md')) expected = { 'category': 'test', 'title': 'Test md File', 'summary': '

I have a lot to test

', 'date': SafeDatetime(2010, 12, 2, 10, 14), 'modified': SafeDatetime(2010, 12, 2, 10, 20), 'tags': ['foo', 'bar', 'foobar'], } self.assertDictHasSubset(metadata, expected) content, metadata = reader.read( _path('article_with_markdown_and_nonascii_summary.md')) expected = { 'title': 'マックOS X 10.8でパイソンとVirtualenvをインストールと設定', 'summary': '

パイソンとVirtualenvをまっくでインストールする方法について明確に説明します。

', 'category': '指導書', 'date': SafeDatetime(2012, 12, 20), 'modified': SafeDatetime(2012, 12, 22), 'tags': ['パイソン', 'マック'], 'slug': 'python-virtualenv-on-mac-osx-mountain-lion-10.8', } self.assertDictHasSubset(metadata, expected) def test_article_with_footnote(self): settings = get_settings() ec = settings['MARKDOWN']['extension_configs'] ec['markdown.extensions.footnotes'] = {'SEPARATOR': '-'} reader = readers.MarkdownReader(settings) content, metadata = reader.read( _path('article_with_markdown_and_footnote.md')) expected_content = ( '

This is some content' '1' ' with some footnotes' '2

\n' '
\n' '
\n
    \n
  1. \n' '

    Numbered footnote ' '

    \n' '
  2. \n
  3. \n' '

    Named footnote ' '

    \n' '
  4. \n
\n
') expected_metadata = { 'title': 'Article with markdown containing footnotes', 'summary': ( '

Summary with inline markup ' 'should be supported.

'), 'date': SafeDatetime(2012, 10, 31), 'modified': SafeDatetime(2012, 11, 1), 'multiline': [ 'Line Metadata should be handle properly.', 'See syntax of Meta-Data extension of ' 'Python Markdown package:', 'If a line is indented by 4 or more spaces,', 'that line is assumed to be an additional line of the value', 'for the previous keyword.', 'A keyword may have as many lines as desired.', ] } self.assertEqual(content, expected_content) self.assertDictHasSubset(metadata, expected_metadata) def test_article_with_file_extensions(self): reader = readers.MarkdownReader(settings=get_settings()) # test to ensure the md file extension is being processed by the # correct reader content, metadata = reader.read( _path('article_with_md_extension.md')) expected = ( "

Test Markdown File Header

\n" "

Used for pelican test

\n" "

The quick brown fox jumped over the lazy dog's back.

") self.assertEqual(content, expected) # test to ensure the mkd file extension is being processed by the # correct reader content, metadata = reader.read( _path('article_with_mkd_extension.mkd')) expected = ("

Test Markdown File Header

\n

Used for pelican" " test

\n

This is another markdown test file. Uses" " the mkd extension.

") self.assertEqual(content, expected) # test to ensure the markdown file extension is being processed by the # correct reader content, metadata = reader.read( _path('article_with_markdown_extension.markdown')) expected = ("

Test Markdown File Header

\n

Used for pelican" " test

\n

This is another markdown test file. Uses" " the markdown extension.

") self.assertEqual(content, expected) # test to ensure the mdown file extension is being processed by the # correct reader content, metadata = reader.read( _path('article_with_mdown_extension.mdown')) expected = ("

Test Markdown File Header

\n

Used for pelican" " test

\n

This is another markdown test file. Uses" " the mdown extension.

") self.assertEqual(content, expected) def test_article_with_markdown_markup_extension(self): # test to ensure the markdown markup extension is being processed as # expected page = self.read_file( path='article_with_markdown_markup_extensions.md', MARKDOWN={ 'extension_configs': { 'markdown.extensions.toc': {}, 'markdown.extensions.codehilite': {}, 'markdown.extensions.extra': {} } } ) expected = ('
\n' '\n' '
\n' '

Level1

\n' '

Level2

') self.assertEqual(page.content, expected) def test_article_with_filename_metadata(self): page = self.read_file( path='2012-11-30_md_w_filename_meta#foo-bar.md', FILENAME_METADATA=None) expected = { 'category': 'yeah', 'author': 'Alexis Métaireau', } self.assertDictHasSubset(page.metadata, expected) page = self.read_file( path='2012-11-30_md_w_filename_meta#foo-bar.md', FILENAME_METADATA=r'(?P\d{4}-\d{2}-\d{2}).*') expected = { 'category': 'yeah', 'author': 'Alexis Métaireau', 'date': SafeDatetime(2012, 11, 30), } self.assertDictHasSubset(page.metadata, expected) page = self.read_file( path='2012-11-30_md_w_filename_meta#foo-bar.md', FILENAME_METADATA=( r'(?P\d{4}-\d{2}-\d{2})' r'_(?P.*)' r'#(?P.*)-(?P.*)')) expected = { 'category': 'yeah', 'author': 'Alexis Métaireau', 'date': SafeDatetime(2012, 11, 30), 'slug': 'md_w_filename_meta', 'mymeta': 'foo', } self.assertDictHasSubset(page.metadata, expected) def test_article_with_optional_filename_metadata(self): page = self.read_file( path='2012-11-30_md_w_filename_meta#foo-bar.md', FILENAME_METADATA=r'(?P\d{4}-\d{2}-\d{2})?') expected = { 'date': SafeDatetime(2012, 11, 30), 'reader': 'markdown', } self.assertDictHasSubset(page.metadata, expected) page = self.read_file( path='empty.md', FILENAME_METADATA=r'(?P\d{4}-\d{2}-\d{2})?') expected = { 'reader': 'markdown', } self.assertDictHasSubset(page.metadata, expected) self.assertNotIn('date', page.metadata, 'Date should not be set.') def test_duplicate_tags_or_authors_are_removed(self): reader = readers.MarkdownReader(settings=get_settings()) content, metadata = reader.read( _path('article_with_duplicate_tags_authors.md')) expected = { 'tags': ['foo', 'bar', 'foobar'], 'authors': ['Author, First', 'Author, Second'], } self.assertDictHasSubset(metadata, expected) def test_metadata_not_parsed_for_metadata(self): settings = get_settings() settings['FORMATTED_FIELDS'] = ['summary'] reader = readers.MarkdownReader(settings=settings) content, metadata = reader.read( _path('article_with_markdown_and_nested_metadata.md')) expected = { 'title': 'Article with markdown and nested summary metadata', 'summary': '

Test: This metadata value looks like metadata

', } self.assertDictHasSubset(metadata, expected) def test_empty_file(self): reader = readers.MarkdownReader(settings=get_settings()) content, metadata = reader.read( _path('empty.md')) self.assertEqual(metadata, {}) self.assertEqual(content, '') def test_empty_file_with_bom(self): reader = readers.MarkdownReader(settings=get_settings()) content, metadata = reader.read( _path('empty_with_bom.md')) self.assertEqual(metadata, {}) self.assertEqual(content, '') def test_typogrify_dashes_config(self): # Test default config page = self.read_file( path='article_with_typogrify_dashes.md', TYPOGRIFY=True, TYPOGRIFY_DASHES='default') expected = "

One: -; Two: —; Three: —-

" expected_title = "One -, two —, three —- dashes!" self.assertEqual(page.content, expected) self.assertEqual(page.title, expected_title) # Test 'oldschool' variant page = self.read_file( path='article_with_typogrify_dashes.md', TYPOGRIFY=True, TYPOGRIFY_DASHES='oldschool') expected = "

One: -; Two: –; Three: —

" expected_title = "One -, two –, three — dashes!" self.assertEqual(page.content, expected) self.assertEqual(page.title, expected_title) # Test 'oldschool_inverted' variant page = self.read_file( path='article_with_typogrify_dashes.md', TYPOGRIFY=True, TYPOGRIFY_DASHES='oldschool_inverted') expected = "

One: -; Two: —; Three: –

" expected_title = "One -, two —, three – dashes!" self.assertEqual(page.content, expected) self.assertEqual(page.title, expected_title) class HTMLReaderTest(ReaderTest): def test_article_with_comments(self): page = self.read_file(path='article_with_comments.html') self.assertEqual(''' Body content ''', page.content) def test_article_with_keywords(self): page = self.read_file(path='article_with_keywords.html') expected = { 'tags': ['foo', 'bar', 'foobar'], } self.assertDictHasSubset(page.metadata, expected) def test_article_with_metadata(self): page = self.read_file(path='article_with_metadata.html') expected = { 'category': 'yeah', 'author': 'Alexis Métaireau', 'title': 'This is a super article !', 'summary': 'Summary and stuff', 'date': SafeDatetime(2010, 12, 2, 10, 14), 'tags': ['foo', 'bar', 'foobar'], 'custom_field': 'http://notmyidea.org', } self.assertDictHasSubset(page.metadata, expected) def test_article_with_multiple_similar_metadata_tags(self): page = self.read_file(path='article_with_multiple_metadata_tags.html') expected = { 'custom_field': ['https://getpelican.com', 'https://www.eff.org'], } self.assertDictHasSubset(page.metadata, expected) def test_article_with_multiple_authors(self): page = self.read_file(path='article_with_multiple_authors.html') expected = { 'authors': ['First Author', 'Second Author'] } self.assertDictHasSubset(page.metadata, expected) def test_article_with_metadata_and_contents_attrib(self): page = self.read_file(path='article_with_metadata_and_contents.html') expected = { 'category': 'yeah', 'author': 'Alexis Métaireau', 'title': 'This is a super article !', 'summary': 'Summary and stuff', 'date': SafeDatetime(2010, 12, 2, 10, 14), 'tags': ['foo', 'bar', 'foobar'], 'custom_field': 'http://notmyidea.org', } self.assertDictHasSubset(page.metadata, expected) def test_article_with_null_attributes(self): page = self.read_file(path='article_with_null_attributes.html') self.assertEqual(''' Ensure that empty attributes are copied properly. ''', page.content) def test_article_with_attributes_containing_double_quotes(self): page = self.read_file(path='article_with_attributes_containing_' + 'double_quotes.html') self.assertEqual(''' Ensure that if an attribute value contains a double quote, it is surrounded with single quotes, otherwise with double quotes. Span content Span content Span content ''', page.content) def test_article_metadata_key_lowercase(self): # Keys of metadata should be lowercase. page = self.read_file(path='article_with_uppercase_metadata.html') # Key should be lowercase self.assertIn('category', page.metadata, 'Key should be lowercase.') # Value should keep cases self.assertEqual('Yeah', page.metadata.get('category')) def test_article_with_nonconformant_meta_tags(self): page = self.read_file(path='article_with_nonconformant_meta_tags.html') expected = { 'summary': 'Summary and stuff', 'title': 'Article with Nonconformant HTML meta tags', } self.assertDictHasSubset(page.metadata, expected) def test_article_with_inline_svg(self): page = self.read_file(path='article_with_inline_svg.html') expected = { 'title': 'Article with an inline SVG', } self.assertDictHasSubset(page.metadata, expected)