903 lines
35 KiB
Python
903 lines
35 KiB
Python
import os
|
|
from unittest.mock import patch
|
|
|
|
from pelican import readers
|
|
from pelican.tests.support import get_settings, unittest
|
|
from pelican.utils import SafeDatetime
|
|
|
|
|
|
CUR_DIR = os.path.dirname(__file__)
|
|
CONTENT_PATH = os.path.join(CUR_DIR, 'content')
|
|
|
|
|
|
def _path(*args):
|
|
return os.path.join(CONTENT_PATH, *args)
|
|
|
|
|
|
class ReaderTest(unittest.TestCase):
|
|
|
|
def read_file(self, path, **kwargs):
|
|
# Isolate from future API changes to readers.read_file
|
|
r = readers.Readers(settings=get_settings(**kwargs))
|
|
return r.read_file(base_path=CONTENT_PATH, path=path)
|
|
|
|
def assertDictHasSubset(self, dictionary, subset):
|
|
for key, value in subset.items():
|
|
if key in dictionary:
|
|
real_value = dictionary.get(key)
|
|
self.assertEqual(
|
|
value,
|
|
real_value,
|
|
'Expected %s to have value %s, but was %s' %
|
|
(key, value, real_value))
|
|
else:
|
|
self.fail(
|
|
'Expected %s to have value %s, but was not in Dict' %
|
|
(key, value))
|
|
|
|
|
|
class TestAssertDictHasSubset(ReaderTest):
|
|
def setUp(self):
|
|
self.dictionary = {
|
|
'key-a': 'val-a',
|
|
'key-b': 'val-b'
|
|
}
|
|
|
|
def tearDown(self):
|
|
self.dictionary = None
|
|
|
|
def test_subset(self):
|
|
self.assertDictHasSubset(self.dictionary, {'key-a': 'val-a'})
|
|
|
|
def test_equal(self):
|
|
self.assertDictHasSubset(self.dictionary, self.dictionary)
|
|
|
|
def test_fail_not_set(self):
|
|
self.assertRaisesRegex(
|
|
AssertionError,
|
|
r'Expected.*key-c.*to have value.*val-c.*but was not in Dict',
|
|
self.assertDictHasSubset,
|
|
self.dictionary,
|
|
{'key-c': 'val-c'})
|
|
|
|
def test_fail_wrong_val(self):
|
|
self.assertRaisesRegex(
|
|
AssertionError,
|
|
r'Expected .*key-a.* to have value .*val-b.* but was .*val-a.*',
|
|
self.assertDictHasSubset,
|
|
self.dictionary,
|
|
{'key-a': 'val-b'})
|
|
|
|
|
|
class DefaultReaderTest(ReaderTest):
|
|
|
|
def test_readfile_unknown_extension(self):
|
|
with self.assertRaises(TypeError):
|
|
self.read_file(path='article_with_metadata.unknownextension')
|
|
|
|
def test_readfile_path_metadata_implicit_dates(self):
|
|
test_file = 'article_with_metadata_implicit_dates.html'
|
|
page = self.read_file(path=test_file, DEFAULT_DATE='fs')
|
|
expected = {
|
|
'date': SafeDatetime.fromtimestamp(
|
|
os.stat(_path(test_file)).st_mtime),
|
|
'modified': SafeDatetime.fromtimestamp(
|
|
os.stat(_path(test_file)).st_mtime)
|
|
}
|
|
|
|
self.assertDictHasSubset(page.metadata, expected)
|
|
|
|
def test_readfile_path_metadata_explicit_dates(self):
|
|
test_file = 'article_with_metadata_explicit_dates.html'
|
|
page = self.read_file(path=test_file, DEFAULT_DATE='fs')
|
|
expected = {
|
|
'date': SafeDatetime(2010, 12, 2, 10, 14),
|
|
'modified': SafeDatetime(2010, 12, 31, 23, 59)
|
|
}
|
|
|
|
self.assertDictHasSubset(page.metadata, expected)
|
|
|
|
def test_readfile_path_metadata_implicit_date_explicit_modified(self):
|
|
test_file = 'article_with_metadata_implicit_date_explicit_modified.html'
|
|
page = self.read_file(path=test_file, DEFAULT_DATE='fs')
|
|
expected = {
|
|
'date': SafeDatetime.fromtimestamp(
|
|
os.stat(_path(test_file)).st_mtime),
|
|
'modified': SafeDatetime(2010, 12, 2, 10, 14),
|
|
}
|
|
|
|
self.assertDictHasSubset(page.metadata, expected)
|
|
|
|
def test_readfile_path_metadata_explicit_date_implicit_modified(self):
|
|
test_file = 'article_with_metadata_explicit_date_implicit_modified.html'
|
|
page = self.read_file(path=test_file, DEFAULT_DATE='fs')
|
|
expected = {
|
|
'date': SafeDatetime(2010, 12, 2, 10, 14),
|
|
'modified': SafeDatetime.fromtimestamp(
|
|
os.stat(_path(test_file)).st_mtime)
|
|
}
|
|
|
|
self.assertDictHasSubset(page.metadata, expected)
|
|
|
|
def test_find_empty_alt(self):
|
|
with patch('pelican.readers.logger') as log_mock:
|
|
content = ['<img alt="" src="test-image.png" width="300px" />',
|
|
'<img src="test-image.png" width="300px" alt="" />']
|
|
|
|
for tag in content:
|
|
readers.find_empty_alt(tag, '/test/path')
|
|
log_mock.warning.assert_called_with(
|
|
'Empty alt attribute for image %s in %s',
|
|
'test-image.png',
|
|
'/test/path',
|
|
extra={'limit_msg':
|
|
'Other images have empty alt attributes'}
|
|
)
|
|
|
|
|
|
class RstReaderTest(ReaderTest):
|
|
|
|
def test_article_with_metadata(self):
|
|
page = self.read_file(path='article_with_metadata.rst')
|
|
expected = {
|
|
'category': 'yeah',
|
|
'author': 'Alexis Métaireau',
|
|
'title': 'This is a super article !',
|
|
'summary': '<p class="first last">Multi-line metadata should be'
|
|
' supported\nas well as <strong>inline'
|
|
' markup</strong> and stuff to "typogrify'
|
|
'"...</p>\n',
|
|
'date': SafeDatetime(2010, 12, 2, 10, 14),
|
|
'modified': SafeDatetime(2010, 12, 2, 10, 20),
|
|
'tags': ['foo', 'bar', 'foobar'],
|
|
'custom_field': 'http://notmyidea.org',
|
|
}
|
|
|
|
self.assertDictHasSubset(page.metadata, expected)
|
|
|
|
def test_article_with_capitalized_metadata(self):
|
|
page = self.read_file(path='article_with_capitalized_metadata.rst')
|
|
expected = {
|
|
'category': 'yeah',
|
|
'author': 'Alexis Métaireau',
|
|
'title': 'This is a super article !',
|
|
'summary': '<p class="first last">Multi-line metadata should be'
|
|
' supported\nas well as <strong>inline'
|
|
' markup</strong> and stuff to "typogrify'
|
|
'"...</p>\n',
|
|
'date': SafeDatetime(2010, 12, 2, 10, 14),
|
|
'modified': SafeDatetime(2010, 12, 2, 10, 20),
|
|
'tags': ['foo', 'bar', 'foobar'],
|
|
'custom_field': 'http://notmyidea.org',
|
|
}
|
|
|
|
self.assertDictHasSubset(page.metadata, expected)
|
|
|
|
def test_article_with_filename_metadata(self):
|
|
page = self.read_file(
|
|
path='2012-11-29_rst_w_filename_meta#foo-bar.rst',
|
|
FILENAME_METADATA=None)
|
|
expected = {
|
|
'category': 'yeah',
|
|
'author': 'Alexis Métaireau',
|
|
'title': 'Rst with filename metadata',
|
|
'reader': 'rst',
|
|
}
|
|
self.assertDictHasSubset(page.metadata, expected)
|
|
|
|
page = self.read_file(
|
|
path='2012-11-29_rst_w_filename_meta#foo-bar.rst',
|
|
FILENAME_METADATA=r'(?P<date>\d{4}-\d{2}-\d{2}).*')
|
|
expected = {
|
|
'category': 'yeah',
|
|
'author': 'Alexis Métaireau',
|
|
'title': 'Rst with filename metadata',
|
|
'date': SafeDatetime(2012, 11, 29),
|
|
'reader': 'rst',
|
|
}
|
|
self.assertDictHasSubset(page.metadata, expected)
|
|
|
|
page = self.read_file(
|
|
path='2012-11-29_rst_w_filename_meta#foo-bar.rst',
|
|
FILENAME_METADATA=(
|
|
r'(?P<date>\d{4}-\d{2}-\d{2})'
|
|
r'_(?P<Slug>.*)'
|
|
r'#(?P<MyMeta>.*)-(?P<author>.*)'))
|
|
expected = {
|
|
'category': 'yeah',
|
|
'author': 'Alexis Métaireau',
|
|
'title': 'Rst with filename metadata',
|
|
'date': SafeDatetime(2012, 11, 29),
|
|
'slug': 'rst_w_filename_meta',
|
|
'mymeta': 'foo',
|
|
'reader': 'rst',
|
|
}
|
|
self.assertDictHasSubset(page.metadata, expected)
|
|
|
|
def test_article_with_optional_filename_metadata(self):
|
|
page = self.read_file(
|
|
path='2012-11-29_rst_w_filename_meta#foo-bar.rst',
|
|
FILENAME_METADATA=r'(?P<date>\d{4}-\d{2}-\d{2})?')
|
|
expected = {
|
|
'date': SafeDatetime(2012, 11, 29),
|
|
'reader': 'rst',
|
|
}
|
|
self.assertDictHasSubset(page.metadata, expected)
|
|
|
|
page = self.read_file(
|
|
path='article.rst',
|
|
FILENAME_METADATA=r'(?P<date>\d{4}-\d{2}-\d{2})?')
|
|
expected = {
|
|
'reader': 'rst',
|
|
}
|
|
self.assertDictHasSubset(page.metadata, expected)
|
|
self.assertNotIn('date', page.metadata, 'Date should not be set.')
|
|
|
|
def test_article_metadata_key_lowercase(self):
|
|
# Keys of metadata should be lowercase.
|
|
reader = readers.RstReader(settings=get_settings())
|
|
content, metadata = reader.read(
|
|
_path('article_with_uppercase_metadata.rst'))
|
|
|
|
self.assertIn('category', metadata, 'Key should be lowercase.')
|
|
self.assertEqual('Yeah', metadata.get('category'),
|
|
'Value keeps case.')
|
|
|
|
def test_article_extra_path_metadata(self):
|
|
input_with_metadata = '2012-11-29_rst_w_filename_meta#foo-bar.rst'
|
|
page_metadata = self.read_file(
|
|
path=input_with_metadata,
|
|
FILENAME_METADATA=(
|
|
r'(?P<date>\d{4}-\d{2}-\d{2})'
|
|
r'_(?P<Slug>.*)'
|
|
r'#(?P<MyMeta>.*)-(?P<author>.*)'
|
|
),
|
|
EXTRA_PATH_METADATA={
|
|
input_with_metadata: {
|
|
'key-1a': 'value-1a',
|
|
'key-1b': 'value-1b'
|
|
}
|
|
}
|
|
)
|
|
expected_metadata = {
|
|
'category': 'yeah',
|
|
'author': 'Alexis Métaireau',
|
|
'title': 'Rst with filename metadata',
|
|
'date': SafeDatetime(2012, 11, 29),
|
|
'slug': 'rst_w_filename_meta',
|
|
'mymeta': 'foo',
|
|
'reader': 'rst',
|
|
'key-1a': 'value-1a',
|
|
'key-1b': 'value-1b'
|
|
}
|
|
self.assertDictHasSubset(page_metadata.metadata, expected_metadata)
|
|
|
|
input_file_path_without_metadata = 'article.rst'
|
|
page_without_metadata = self.read_file(
|
|
path=input_file_path_without_metadata,
|
|
EXTRA_PATH_METADATA={
|
|
input_file_path_without_metadata: {
|
|
'author': 'Charlès Overwrite'
|
|
}
|
|
}
|
|
)
|
|
expected_without_metadata = {
|
|
'category': 'misc',
|
|
'author': 'Charlès Overwrite',
|
|
'title': 'Article title',
|
|
'reader': 'rst',
|
|
}
|
|
self.assertDictHasSubset(
|
|
page_without_metadata.metadata,
|
|
expected_without_metadata)
|
|
|
|
def test_article_extra_path_metadata_dont_overwrite(self):
|
|
# EXTRA_PATH_METADATA['author'] should get ignored
|
|
# since we don't overwrite already set values
|
|
input_file_path = '2012-11-29_rst_w_filename_meta#foo-bar.rst'
|
|
page = self.read_file(
|
|
path=input_file_path,
|
|
FILENAME_METADATA=(
|
|
r'(?P<date>\d{4}-\d{2}-\d{2})'
|
|
r'_(?P<Slug>.*)'
|
|
r'#(?P<MyMeta>.*)-(?P<orginalauthor>.*)'
|
|
),
|
|
EXTRA_PATH_METADATA={
|
|
input_file_path: {
|
|
'author': 'Charlès Overwrite',
|
|
'key-1b': 'value-1b'
|
|
}
|
|
}
|
|
)
|
|
expected = {
|
|
'category': 'yeah',
|
|
'author': 'Alexis Métaireau',
|
|
'title': 'Rst with filename metadata',
|
|
'date': SafeDatetime(2012, 11, 29),
|
|
'slug': 'rst_w_filename_meta',
|
|
'mymeta': 'foo',
|
|
'reader': 'rst',
|
|
'key-1b': 'value-1b'
|
|
}
|
|
self.assertDictHasSubset(page.metadata, expected)
|
|
|
|
def test_article_extra_path_metadata_recurse(self):
|
|
parent = "TestCategory"
|
|
notparent = "TestCategory/article"
|
|
path = "TestCategory/article_without_category.rst"
|
|
|
|
epm = {
|
|
parent: {'epmr_inherit': parent,
|
|
'epmr_override': parent, },
|
|
notparent: {'epmr_bogus': notparent},
|
|
path: {'epmr_override': path, },
|
|
}
|
|
expected_metadata = {
|
|
'epmr_inherit': parent,
|
|
'epmr_override': path,
|
|
}
|
|
|
|
page = self.read_file(path=path, EXTRA_PATH_METADATA=epm)
|
|
self.assertDictHasSubset(page.metadata, expected_metadata)
|
|
|
|
# Make sure vars aren't getting "inherited" by mistake...
|
|
path = "article.rst"
|
|
page = self.read_file(path=path, EXTRA_PATH_METADATA=epm)
|
|
for k in expected_metadata.keys():
|
|
self.assertNotIn(k, page.metadata)
|
|
|
|
# Same, but for edge cases where one file's name is a prefix of
|
|
# another.
|
|
path = "TestCategory/article_without_category.rst"
|
|
page = self.read_file(path=path, EXTRA_PATH_METADATA=epm)
|
|
for k in epm[notparent].keys():
|
|
self.assertNotIn(k, page.metadata)
|
|
|
|
def test_typogrify(self):
|
|
# if nothing is specified in the settings, the content should be
|
|
# unmodified
|
|
page = self.read_file(path='article.rst')
|
|
expected = ('<p>THIS is some content. With some stuff to '
|
|
'"typogrify"...</p>\n<p>Now with added '
|
|
'support for <abbr title="three letter acronym">'
|
|
'TLA</abbr>.</p>\n')
|
|
|
|
self.assertEqual(page.content, expected)
|
|
|
|
try:
|
|
# otherwise, typogrify should be applied
|
|
page = self.read_file(path='article.rst', TYPOGRIFY=True)
|
|
expected = (
|
|
'<p><span class="caps">THIS</span> is some content. '
|
|
'With some stuff to “typogrify”…</p>\n'
|
|
'<p>Now with added support for <abbr title="three letter '
|
|
'acronym"><span class="caps">TLA</span></abbr>.</p>\n')
|
|
|
|
self.assertEqual(page.content, expected)
|
|
except ImportError:
|
|
return unittest.skip('need the typogrify distribution')
|
|
|
|
def test_typogrify_summary(self):
|
|
# if nothing is specified in the settings, the summary should be
|
|
# unmodified
|
|
page = self.read_file(path='article_with_metadata.rst')
|
|
expected = ('<p class="first last">Multi-line metadata should be'
|
|
' supported\nas well as <strong>inline'
|
|
' markup</strong> and stuff to "typogrify'
|
|
'"...</p>\n')
|
|
|
|
self.assertEqual(page.metadata['summary'], expected)
|
|
|
|
try:
|
|
# otherwise, typogrify should be applied
|
|
page = self.read_file(path='article_with_metadata.rst',
|
|
TYPOGRIFY=True)
|
|
expected = ('<p class="first last">Multi-line metadata should be'
|
|
' supported\nas well as <strong>inline'
|
|
' markup</strong> and stuff to “typogrify'
|
|
'”…</p>\n')
|
|
|
|
self.assertEqual(page.metadata['summary'], expected)
|
|
except ImportError:
|
|
return unittest.skip('need the typogrify distribution')
|
|
|
|
def test_typogrify_ignore_tags(self):
|
|
try:
|
|
# typogrify should be able to ignore user specified tags,
|
|
# but tries to be clever with widont extension
|
|
page = self.read_file(path='article.rst', TYPOGRIFY=True,
|
|
TYPOGRIFY_IGNORE_TAGS=['p'])
|
|
expected = ('<p>THIS is some content. With some stuff to '
|
|
'"typogrify"...</p>\n<p>Now with added '
|
|
'support for <abbr title="three letter acronym">'
|
|
'TLA</abbr>.</p>\n')
|
|
|
|
self.assertEqual(page.content, expected)
|
|
|
|
# typogrify should ignore code blocks by default because
|
|
# code blocks are composed inside the pre tag
|
|
page = self.read_file(path='article_with_code_block.rst',
|
|
TYPOGRIFY=True)
|
|
|
|
expected = ('<p>An article with some code</p>\n'
|
|
'<div class="highlight"><pre><span></span>'
|
|
'<span class="n">x</span>'
|
|
' <span class="o">&</span>'
|
|
' <span class="n">y</span>\n</pre></div>\n'
|
|
'<p>A block quote:</p>\n<blockquote>\nx '
|
|
'<span class="amp">&</span> y</blockquote>\n'
|
|
'<p>Normal:\nx'
|
|
' <span class="amp">&</span>'
|
|
' y'
|
|
'</p>\n')
|
|
|
|
self.assertEqual(page.content, expected)
|
|
|
|
# instruct typogrify to also ignore blockquotes
|
|
page = self.read_file(path='article_with_code_block.rst',
|
|
TYPOGRIFY=True,
|
|
TYPOGRIFY_IGNORE_TAGS=['blockquote'])
|
|
|
|
expected = ('<p>An article with some code</p>\n'
|
|
'<div class="highlight"><pre><span>'
|
|
'</span><span class="n">x</span>'
|
|
' <span class="o">&</span>'
|
|
' <span class="n">y</span>\n</pre></div>\n'
|
|
'<p>A block quote:</p>\n<blockquote>\nx '
|
|
'& y</blockquote>\n'
|
|
'<p>Normal:\nx'
|
|
' <span class="amp">&</span>'
|
|
' y'
|
|
'</p>\n')
|
|
|
|
self.assertEqual(page.content, expected)
|
|
except ImportError:
|
|
return unittest.skip('need the typogrify distribution')
|
|
except TypeError:
|
|
return unittest.skip('need typogrify version 2.0.4 or later')
|
|
|
|
def test_article_with_multiple_authors(self):
|
|
page = self.read_file(path='article_with_multiple_authors.rst')
|
|
expected = {
|
|
'authors': ['First Author', 'Second Author']
|
|
}
|
|
|
|
self.assertDictHasSubset(page.metadata, expected)
|
|
|
|
def test_article_with_multiple_authors_semicolon(self):
|
|
page = self.read_file(
|
|
path='article_with_multiple_authors_semicolon.rst')
|
|
expected = {
|
|
'authors': ['Author, First', 'Author, Second']
|
|
}
|
|
|
|
self.assertDictHasSubset(page.metadata, expected)
|
|
|
|
def test_article_with_multiple_authors_list(self):
|
|
page = self.read_file(path='article_with_multiple_authors_list.rst')
|
|
expected = {
|
|
'authors': ['Author, First', 'Author, Second']
|
|
}
|
|
|
|
self.assertDictHasSubset(page.metadata, expected)
|
|
|
|
def test_default_date_formats(self):
|
|
tuple_date = self.read_file(path='article.rst',
|
|
DEFAULT_DATE=(2012, 5, 1))
|
|
string_date = self.read_file(path='article.rst',
|
|
DEFAULT_DATE='2012-05-01')
|
|
|
|
self.assertEqual(tuple_date.metadata['date'],
|
|
string_date.metadata['date'])
|
|
|
|
def test_parse_error(self):
|
|
# Verify that it raises an Exception, not nothing and not SystemExit or
|
|
# some such
|
|
with self.assertRaisesRegex(Exception, "underline too short"):
|
|
self.read_file(path='../parse_error/parse_error.rst')
|
|
|
|
def test_typogrify_dashes_config(self):
|
|
# Test default config
|
|
page = self.read_file(
|
|
path='article_with_typogrify_dashes.rst',
|
|
TYPOGRIFY=True,
|
|
TYPOGRIFY_DASHES='default')
|
|
expected = "<p>One: -; Two: —; Three: —-</p>\n"
|
|
expected_title = "One -, two —, three —- dashes!"
|
|
|
|
self.assertEqual(page.content, expected)
|
|
self.assertEqual(page.title, expected_title)
|
|
|
|
# Test 'oldschool' variant
|
|
page = self.read_file(
|
|
path='article_with_typogrify_dashes.rst',
|
|
TYPOGRIFY=True,
|
|
TYPOGRIFY_DASHES='oldschool')
|
|
expected = "<p>One: -; Two: –; Three: —</p>\n"
|
|
expected_title = "One -, two –, three — dashes!"
|
|
|
|
self.assertEqual(page.content, expected)
|
|
self.assertEqual(page.title, expected_title)
|
|
|
|
# Test 'oldschool_inverted' variant
|
|
page = self.read_file(
|
|
path='article_with_typogrify_dashes.rst',
|
|
TYPOGRIFY=True,
|
|
TYPOGRIFY_DASHES='oldschool_inverted')
|
|
expected = "<p>One: -; Two: —; Three: –</p>\n"
|
|
expected_title = "One -, two —, three – dashes!"
|
|
|
|
self.assertEqual(page.content, expected)
|
|
self.assertEqual(page.title, expected_title)
|
|
|
|
|
|
@unittest.skipUnless(readers.Markdown, "markdown isn't installed")
|
|
class MdReaderTest(ReaderTest):
|
|
|
|
def test_article_with_metadata(self):
|
|
reader = readers.MarkdownReader(settings=get_settings())
|
|
content, metadata = reader.read(
|
|
_path('article_with_md_extension.md'))
|
|
expected = {
|
|
'category': 'test',
|
|
'title': 'Test md File',
|
|
'summary': '<p>I have a lot to test</p>',
|
|
'date': SafeDatetime(2010, 12, 2, 10, 14),
|
|
'modified': SafeDatetime(2010, 12, 2, 10, 20),
|
|
'tags': ['foo', 'bar', 'foobar'],
|
|
}
|
|
self.assertDictHasSubset(metadata, expected)
|
|
|
|
content, metadata = reader.read(
|
|
_path('article_with_markdown_and_nonascii_summary.md'))
|
|
expected = {
|
|
'title': 'マックOS X 10.8でパイソンとVirtualenvをインストールと設定',
|
|
'summary': '<p>パイソンとVirtualenvをまっくでインストールする方法について明確に説明します。</p>',
|
|
'category': '指導書',
|
|
'date': SafeDatetime(2012, 12, 20),
|
|
'modified': SafeDatetime(2012, 12, 22),
|
|
'tags': ['パイソン', 'マック'],
|
|
'slug': 'python-virtualenv-on-mac-osx-mountain-lion-10.8',
|
|
}
|
|
self.assertDictHasSubset(metadata, expected)
|
|
|
|
def test_article_with_footnote(self):
|
|
settings = get_settings()
|
|
ec = settings['MARKDOWN']['extension_configs']
|
|
ec['markdown.extensions.footnotes'] = {'SEPARATOR': '-'}
|
|
reader = readers.MarkdownReader(settings)
|
|
content, metadata = reader.read(
|
|
_path('article_with_markdown_and_footnote.md'))
|
|
expected_content = (
|
|
'<p>This is some content'
|
|
'<sup id="fnref-1"><a class="footnote-ref" href="#fn-1"'
|
|
'>1</a></sup>'
|
|
' with some footnotes'
|
|
'<sup id="fnref-footnote"><a class="footnote-ref" '
|
|
'href="#fn-footnote">2</a></sup></p>\n'
|
|
|
|
'<div class="footnote">\n'
|
|
'<hr>\n<ol>\n<li id="fn-1">\n'
|
|
'<p>Numbered footnote '
|
|
'<a class="footnote-backref" href="#fnref-1" '
|
|
'title="Jump back to footnote 1 in the text">↩</a></p>\n'
|
|
'</li>\n<li id="fn-footnote">\n'
|
|
'<p>Named footnote '
|
|
'<a class="footnote-backref" href="#fnref-footnote"'
|
|
' title="Jump back to footnote 2 in the text">↩</a></p>\n'
|
|
'</li>\n</ol>\n</div>')
|
|
expected_metadata = {
|
|
'title': 'Article with markdown containing footnotes',
|
|
'summary': (
|
|
'<p>Summary with <strong>inline</strong> markup '
|
|
'<em>should</em> be supported.</p>'),
|
|
'date': SafeDatetime(2012, 10, 31),
|
|
'modified': SafeDatetime(2012, 11, 1),
|
|
'multiline': [
|
|
'Line Metadata should be handle properly.',
|
|
'See syntax of Meta-Data extension of '
|
|
'Python Markdown package:',
|
|
'If a line is indented by 4 or more spaces,',
|
|
'that line is assumed to be an additional line of the value',
|
|
'for the previous keyword.',
|
|
'A keyword may have as many lines as desired.',
|
|
]
|
|
}
|
|
self.assertEqual(content, expected_content)
|
|
self.assertDictHasSubset(metadata, expected_metadata)
|
|
|
|
def test_article_with_file_extensions(self):
|
|
reader = readers.MarkdownReader(settings=get_settings())
|
|
# test to ensure the md file extension is being processed by the
|
|
# correct reader
|
|
content, metadata = reader.read(
|
|
_path('article_with_md_extension.md'))
|
|
expected = (
|
|
"<h1>Test Markdown File Header</h1>\n"
|
|
"<h2>Used for pelican test</h2>\n"
|
|
"<p>The quick brown fox jumped over the lazy dog's back.</p>")
|
|
self.assertEqual(content, expected)
|
|
# test to ensure the mkd file extension is being processed by the
|
|
# correct reader
|
|
content, metadata = reader.read(
|
|
_path('article_with_mkd_extension.mkd'))
|
|
expected = ("<h1>Test Markdown File Header</h1>\n<h2>Used for pelican"
|
|
" test</h2>\n<p>This is another markdown test file. Uses"
|
|
" the mkd extension.</p>")
|
|
self.assertEqual(content, expected)
|
|
# test to ensure the markdown file extension is being processed by the
|
|
# correct reader
|
|
content, metadata = reader.read(
|
|
_path('article_with_markdown_extension.markdown'))
|
|
expected = ("<h1>Test Markdown File Header</h1>\n<h2>Used for pelican"
|
|
" test</h2>\n<p>This is another markdown test file. Uses"
|
|
" the markdown extension.</p>")
|
|
self.assertEqual(content, expected)
|
|
# test to ensure the mdown file extension is being processed by the
|
|
# correct reader
|
|
content, metadata = reader.read(
|
|
_path('article_with_mdown_extension.mdown'))
|
|
expected = ("<h1>Test Markdown File Header</h1>\n<h2>Used for pelican"
|
|
" test</h2>\n<p>This is another markdown test file. Uses"
|
|
" the mdown extension.</p>")
|
|
self.assertEqual(content, expected)
|
|
|
|
def test_article_with_markdown_markup_extension(self):
|
|
# test to ensure the markdown markup extension is being processed as
|
|
# expected
|
|
page = self.read_file(
|
|
path='article_with_markdown_markup_extensions.md',
|
|
MARKDOWN={
|
|
'extension_configs': {
|
|
'markdown.extensions.toc': {},
|
|
'markdown.extensions.codehilite': {},
|
|
'markdown.extensions.extra': {}
|
|
}
|
|
}
|
|
)
|
|
expected = ('<div class="toc">\n'
|
|
'<ul>\n'
|
|
'<li><a href="#level1">Level1</a><ul>\n'
|
|
'<li><a href="#level2">Level2</a></li>\n'
|
|
'</ul>\n'
|
|
'</li>\n'
|
|
'</ul>\n'
|
|
'</div>\n'
|
|
'<h2 id="level1">Level1</h2>\n'
|
|
'<h3 id="level2">Level2</h3>')
|
|
|
|
self.assertEqual(page.content, expected)
|
|
|
|
def test_article_with_filename_metadata(self):
|
|
page = self.read_file(
|
|
path='2012-11-30_md_w_filename_meta#foo-bar.md',
|
|
FILENAME_METADATA=None)
|
|
expected = {
|
|
'category': 'yeah',
|
|
'author': 'Alexis Métaireau',
|
|
}
|
|
self.assertDictHasSubset(page.metadata, expected)
|
|
|
|
page = self.read_file(
|
|
path='2012-11-30_md_w_filename_meta#foo-bar.md',
|
|
FILENAME_METADATA=r'(?P<date>\d{4}-\d{2}-\d{2}).*')
|
|
expected = {
|
|
'category': 'yeah',
|
|
'author': 'Alexis Métaireau',
|
|
'date': SafeDatetime(2012, 11, 30),
|
|
}
|
|
self.assertDictHasSubset(page.metadata, expected)
|
|
|
|
page = self.read_file(
|
|
path='2012-11-30_md_w_filename_meta#foo-bar.md',
|
|
FILENAME_METADATA=(
|
|
r'(?P<date>\d{4}-\d{2}-\d{2})'
|
|
r'_(?P<Slug>.*)'
|
|
r'#(?P<MyMeta>.*)-(?P<author>.*)'))
|
|
expected = {
|
|
'category': 'yeah',
|
|
'author': 'Alexis Métaireau',
|
|
'date': SafeDatetime(2012, 11, 30),
|
|
'slug': 'md_w_filename_meta',
|
|
'mymeta': 'foo',
|
|
}
|
|
self.assertDictHasSubset(page.metadata, expected)
|
|
|
|
def test_article_with_optional_filename_metadata(self):
|
|
page = self.read_file(
|
|
path='2012-11-30_md_w_filename_meta#foo-bar.md',
|
|
FILENAME_METADATA=r'(?P<date>\d{4}-\d{2}-\d{2})?')
|
|
expected = {
|
|
'date': SafeDatetime(2012, 11, 30),
|
|
'reader': 'markdown',
|
|
}
|
|
self.assertDictHasSubset(page.metadata, expected)
|
|
|
|
page = self.read_file(
|
|
path='empty.md',
|
|
FILENAME_METADATA=r'(?P<date>\d{4}-\d{2}-\d{2})?')
|
|
expected = {
|
|
'reader': 'markdown',
|
|
}
|
|
self.assertDictHasSubset(page.metadata, expected)
|
|
self.assertNotIn('date', page.metadata, 'Date should not be set.')
|
|
|
|
def test_duplicate_tags_or_authors_are_removed(self):
|
|
reader = readers.MarkdownReader(settings=get_settings())
|
|
content, metadata = reader.read(
|
|
_path('article_with_duplicate_tags_authors.md'))
|
|
expected = {
|
|
'tags': ['foo', 'bar', 'foobar'],
|
|
'authors': ['Author, First', 'Author, Second'],
|
|
}
|
|
self.assertDictHasSubset(metadata, expected)
|
|
|
|
def test_metadata_not_parsed_for_metadata(self):
|
|
settings = get_settings()
|
|
settings['FORMATTED_FIELDS'] = ['summary']
|
|
|
|
reader = readers.MarkdownReader(settings=settings)
|
|
content, metadata = reader.read(
|
|
_path('article_with_markdown_and_nested_metadata.md'))
|
|
expected = {
|
|
'title': 'Article with markdown and nested summary metadata',
|
|
'summary': '<p>Test: This metadata value looks like metadata</p>',
|
|
}
|
|
self.assertDictHasSubset(metadata, expected)
|
|
|
|
def test_empty_file(self):
|
|
reader = readers.MarkdownReader(settings=get_settings())
|
|
content, metadata = reader.read(
|
|
_path('empty.md'))
|
|
|
|
self.assertEqual(metadata, {})
|
|
self.assertEqual(content, '')
|
|
|
|
def test_empty_file_with_bom(self):
|
|
reader = readers.MarkdownReader(settings=get_settings())
|
|
content, metadata = reader.read(
|
|
_path('empty_with_bom.md'))
|
|
|
|
self.assertEqual(metadata, {})
|
|
self.assertEqual(content, '')
|
|
|
|
def test_typogrify_dashes_config(self):
|
|
# Test default config
|
|
page = self.read_file(
|
|
path='article_with_typogrify_dashes.md',
|
|
TYPOGRIFY=True,
|
|
TYPOGRIFY_DASHES='default')
|
|
expected = "<p>One: -; Two: —; Three: —-</p>"
|
|
expected_title = "One -, two —, three —- dashes!"
|
|
|
|
self.assertEqual(page.content, expected)
|
|
self.assertEqual(page.title, expected_title)
|
|
|
|
# Test 'oldschool' variant
|
|
page = self.read_file(
|
|
path='article_with_typogrify_dashes.md',
|
|
TYPOGRIFY=True,
|
|
TYPOGRIFY_DASHES='oldschool')
|
|
expected = "<p>One: -; Two: –; Three: —</p>"
|
|
expected_title = "One -, two –, three — dashes!"
|
|
|
|
self.assertEqual(page.content, expected)
|
|
self.assertEqual(page.title, expected_title)
|
|
|
|
# Test 'oldschool_inverted' variant
|
|
page = self.read_file(
|
|
path='article_with_typogrify_dashes.md',
|
|
TYPOGRIFY=True,
|
|
TYPOGRIFY_DASHES='oldschool_inverted')
|
|
expected = "<p>One: -; Two: —; Three: –</p>"
|
|
expected_title = "One -, two —, three – dashes!"
|
|
|
|
self.assertEqual(page.content, expected)
|
|
self.assertEqual(page.title, expected_title)
|
|
|
|
|
|
class HTMLReaderTest(ReaderTest):
|
|
def test_article_with_comments(self):
|
|
page = self.read_file(path='article_with_comments.html')
|
|
|
|
self.assertEqual('''
|
|
Body content
|
|
<!-- This comment is included (including extra whitespace) -->
|
|
''', page.content)
|
|
|
|
def test_article_with_keywords(self):
|
|
page = self.read_file(path='article_with_keywords.html')
|
|
expected = {
|
|
'tags': ['foo', 'bar', 'foobar'],
|
|
}
|
|
|
|
self.assertDictHasSubset(page.metadata, expected)
|
|
|
|
def test_article_with_metadata(self):
|
|
page = self.read_file(path='article_with_metadata.html')
|
|
expected = {
|
|
'category': 'yeah',
|
|
'author': 'Alexis Métaireau',
|
|
'title': 'This is a super article !',
|
|
'summary': 'Summary and stuff',
|
|
'date': SafeDatetime(2010, 12, 2, 10, 14),
|
|
'tags': ['foo', 'bar', 'foobar'],
|
|
'custom_field': 'http://notmyidea.org',
|
|
}
|
|
|
|
self.assertDictHasSubset(page.metadata, expected)
|
|
|
|
def test_article_with_multiple_similar_metadata_tags(self):
|
|
page = self.read_file(path='article_with_multiple_metadata_tags.html')
|
|
expected = {
|
|
'custom_field': ['https://getpelican.com', 'https://www.eff.org'],
|
|
}
|
|
|
|
self.assertDictHasSubset(page.metadata, expected)
|
|
|
|
def test_article_with_multiple_authors(self):
|
|
page = self.read_file(path='article_with_multiple_authors.html')
|
|
expected = {
|
|
'authors': ['First Author', 'Second Author']
|
|
}
|
|
|
|
self.assertDictHasSubset(page.metadata, expected)
|
|
|
|
def test_article_with_metadata_and_contents_attrib(self):
|
|
page = self.read_file(path='article_with_metadata_and_contents.html')
|
|
expected = {
|
|
'category': 'yeah',
|
|
'author': 'Alexis Métaireau',
|
|
'title': 'This is a super article !',
|
|
'summary': 'Summary and stuff',
|
|
'date': SafeDatetime(2010, 12, 2, 10, 14),
|
|
'tags': ['foo', 'bar', 'foobar'],
|
|
'custom_field': 'http://notmyidea.org',
|
|
}
|
|
self.assertDictHasSubset(page.metadata, expected)
|
|
|
|
def test_article_with_null_attributes(self):
|
|
page = self.read_file(path='article_with_null_attributes.html')
|
|
|
|
self.assertEqual('''
|
|
Ensure that empty attributes are copied properly.
|
|
<input name="test" disabled style="" />
|
|
''', page.content)
|
|
|
|
def test_article_with_attributes_containing_double_quotes(self):
|
|
page = self.read_file(path='article_with_attributes_containing_' +
|
|
'double_quotes.html')
|
|
self.assertEqual('''
|
|
Ensure that if an attribute value contains a double quote, it is
|
|
surrounded with single quotes, otherwise with double quotes.
|
|
<span data-test="'single quoted string'">Span content</span>
|
|
<span data-test='"double quoted string"'>Span content</span>
|
|
<span data-test="string without quotes">Span content</span>
|
|
''', page.content)
|
|
|
|
def test_article_metadata_key_lowercase(self):
|
|
# Keys of metadata should be lowercase.
|
|
page = self.read_file(path='article_with_uppercase_metadata.html')
|
|
|
|
# Key should be lowercase
|
|
self.assertIn('category', page.metadata, 'Key should be lowercase.')
|
|
|
|
# Value should keep cases
|
|
self.assertEqual('Yeah', page.metadata.get('category'))
|
|
|
|
def test_article_with_nonconformant_meta_tags(self):
|
|
page = self.read_file(path='article_with_nonconformant_meta_tags.html')
|
|
expected = {
|
|
'summary': 'Summary and stuff',
|
|
'title': 'Article with Nonconformant HTML meta tags',
|
|
}
|
|
|
|
self.assertDictHasSubset(page.metadata, expected)
|
|
|
|
def test_article_with_inline_svg(self):
|
|
page = self.read_file(path='article_with_inline_svg.html')
|
|
expected = {
|
|
'title': 'Article with an inline SVG',
|
|
}
|
|
self.assertDictHasSubset(page.metadata, expected)
|