hbsc_site/pelican/plugins/extract_toc/extract_toc.py


								# -*- coding: utf-8 -*-

								"""

								Extract Table of Content

								========================


								A Pelican plugin to extract table of contents (ToC) from `article.content` and

								place it in its own `article.toc` variable for use in templates.

								"""


								from os import path

								from bs4 import BeautifulSoup

								from pelican import signals, readers, contents

								import logging


								logger = logging.getLogger(__name__)


								def extract_toc(content):

								    if isinstance(content, contents.Static):

								        return


								    soup = BeautifulSoup(content._content, 'html.parser')

								    filename = content.source_path

								    extension = path.splitext(filename)[1][1:]

								    toc = None


								    # default Markdown reader

								    if not toc and readers.MarkdownReader.enabled and extension in readers.MarkdownReader.file_extensions:

								        toc = soup.find('div', class_='toc')

								        if toc:

								            toc.extract()


								    # default reStructuredText reader

								    if not toc and readers.RstReader.enabled and extension in readers.RstReader.file_extensions:

								        toc = soup.find('div', class_='contents topic')

								        if toc:

								            toc.extract()

								            tag = BeautifulSoup(str(toc), 'html.parser')

								            tag.div['class'] = 'toc'

								            tag.div['id'] = ''

								            p = tag.find('p', class_='topic-title first')

								            if p:

								                p.extract()

								            toc = tag


								    # Pandoc reader (markdown and other formats)

								    if 'pandoc_reader' in content.settings['PLUGINS']:

								        try:

								            from pandoc_reader import PandocReader

								        except ImportError:

								            PandocReader = False

								        if not toc and PandocReader and PandocReader.enabled and extension in PandocReader.file_extensions:

								            toc = soup.find('nav', id='TOC')


								    if toc:

								        toc.extract()

								        content._content = soup.decode()

								        content.toc = toc.decode()

								        if content.toc.startswith('<html>'):

								            content.toc = content.toc[12:-14]


								def register():

								    signals.content_object_init.connect(extract_toc)