# -*- coding: utf-8 -*- """ Extract Table of Content ======================== A Pelican plugin to extract table of contents (ToC) from `article.content` and place it in its own `article.toc` variable for use in templates. """ from os import path from bs4 import BeautifulSoup from pelican import signals, readers, contents import logging logger = logging.getLogger(__name__) def extract_toc(content): if isinstance(content, contents.Static): return soup = BeautifulSoup(content._content, 'html.parser') filename = content.source_path extension = path.splitext(filename)[1][1:] toc = None # default Markdown reader if not toc and readers.MarkdownReader.enabled and extension in readers.MarkdownReader.file_extensions: toc = soup.find('div', class_='toc') if toc: toc.extract() # default reStructuredText reader if not toc and readers.RstReader.enabled and extension in readers.RstReader.file_extensions: toc = soup.find('div', class_='contents topic') if toc: toc.extract() tag = BeautifulSoup(str(toc), 'html.parser') tag.div['class'] = 'toc' tag.div['id'] = '' p = tag.find('p', class_='topic-title first') if p: p.extract() toc = tag # Pandoc reader (markdown and other formats) if 'pandoc_reader' in content.settings['PLUGINS']: try: from pandoc_reader import PandocReader except ImportError: PandocReader = False if not toc and PandocReader and PandocReader.enabled and extension in PandocReader.file_extensions: toc = soup.find('nav', id='TOC') if toc: toc.extract() content._content = soup.decode() content.toc = toc.decode() if content.toc.startswith(''): content.toc = content.toc[12:-14] def register(): signals.content_object_init.connect(extract_toc)