manymanymany-varia-websites/plugins/extract_toc/extract_toc.py

# -*- coding: utf-8 -*-
"""
Extract Table of Content
========================

A Pelican plugin to extract table of contents (ToC) from `article.content` and
place it in its own `article.toc` variable for use in templates.
"""

from os import path
from bs4 import BeautifulSoup
from pelican import signals, readers, contents
import logging

logger = logging.getLogger(__name__)


def extract_toc(content):
    if isinstance(content, contents.Static):
        return

    soup = BeautifulSoup(content._content, 'html.parser')
    filename = content.source_path
    extension = path.splitext(filename)[1][1:]
    toc = None

    # default Markdown reader
    if not toc and readers.MarkdownReader.enabled and extension in readers.MarkdownReader.file_extensions:
        toc = soup.find('div', class_='toc')
        if toc:
            toc.extract()

    # default reStructuredText reader
    if not toc and readers.RstReader.enabled and extension in readers.RstReader.file_extensions:
        toc = soup.find('div', class_='contents topic')
        if toc:
            toc.extract()
            tag = BeautifulSoup(str(toc), 'html.parser')
            tag.div['class'] = 'toc'
            tag.div['id'] = ''
            p = tag.find('p', class_='topic-title first')
            if p:
                p.extract()
            toc = tag

    # Pandoc reader (markdown and other formats)
    if 'pandoc_reader' in content.settings['PLUGINS']:
        try:
            from pandoc_reader import PandocReader
        except ImportError:
            PandocReader = False
        if not toc and PandocReader and PandocReader.enabled and extension in PandocReader.file_extensions:
            toc = soup.find('nav', id='TOC')

    if toc:
        toc.extract()
        content._content = soup.decode()
        content.toc = toc.decode()
        if content.toc.startswith('<html>'):
            content.toc = content.toc[12:-14]


def register():
    signals.content_object_init.connect(extract_toc)
adding the 2 submodules again 7 years ago			`# -- coding: utf-8 --`
			`"""`
			`Extract Table of Content`
			`========================`

			A Pelican plugin to extract table of contents (ToC) from `article.content` and
			place it in its own `article.toc` variable for use in templates.
			`"""`

			`from os import path`
			`from bs4 import BeautifulSoup`
			`from pelican import signals, readers, contents`
			`import logging`

			`logger = logging.getLogger(__name__)`


			`def extract_toc(content):`
			`if isinstance(content, contents.Static):`
			`return`

			`soup = BeautifulSoup(content._content, 'html.parser')`
			`filename = content.source_path`
			`extension = path.splitext(filename)[1][1:]`
			`toc = None`

			`# default Markdown reader`
			`if not toc and readers.MarkdownReader.enabled and extension in readers.MarkdownReader.file_extensions:`
			`toc = soup.find('div', class_='toc')`
			`if toc:`
			`toc.extract()`

			`# default reStructuredText reader`
			`if not toc and readers.RstReader.enabled and extension in readers.RstReader.file_extensions:`
			`toc = soup.find('div', class_='contents topic')`
			`if toc:`
			`toc.extract()`
			`tag = BeautifulSoup(str(toc), 'html.parser')`
			`tag.div['class'] = 'toc'`
			`tag.div['id'] = ''`
			`p = tag.find('p', class_='topic-title first')`
			`if p:`
			`p.extract()`
			`toc = tag`

			`# Pandoc reader (markdown and other formats)`
			`if 'pandoc_reader' in content.settings['PLUGINS']:`
			`try:`
			`from pandoc_reader import PandocReader`
			`except ImportError:`
			`PandocReader = False`
			`if not toc and PandocReader and PandocReader.enabled and extension in PandocReader.file_extensions:`
			`toc = soup.find('nav', id='TOC')`

			`if toc:`
			`toc.extract()`
			`content._content = soup.decode()`
			`content.toc = toc.decode()`
			`if content.toc.startswith('<html>'):`
			`content.toc = content.toc[12:-14]`


			`def register():`
			`signals.content_object_init.connect(extract_toc)`