forked from varia/varia.website
many many many Varia's websites, work in progress: https://many.vvvvvvaria.org
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
64 lines
1.9 KiB
64 lines
1.9 KiB
# -*- coding: utf-8 -*-
|
|
"""
|
|
Extract Table of Content
|
|
========================
|
|
|
|
A Pelican plugin to extract table of contents (ToC) from `article.content` and
|
|
place it in its own `article.toc` variable for use in templates.
|
|
"""
|
|
|
|
from os import path
|
|
from bs4 import BeautifulSoup
|
|
from pelican import signals, readers, contents
|
|
import logging
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
def extract_toc(content):
|
|
if isinstance(content, contents.Static):
|
|
return
|
|
|
|
soup = BeautifulSoup(content._content, 'html.parser')
|
|
filename = content.source_path
|
|
extension = path.splitext(filename)[1][1:]
|
|
toc = None
|
|
|
|
# default Markdown reader
|
|
if not toc and readers.MarkdownReader.enabled and extension in readers.MarkdownReader.file_extensions:
|
|
toc = soup.find('div', class_='toc')
|
|
if toc:
|
|
toc.extract()
|
|
|
|
# default reStructuredText reader
|
|
if not toc and readers.RstReader.enabled and extension in readers.RstReader.file_extensions:
|
|
toc = soup.find('div', class_='contents topic')
|
|
if toc:
|
|
toc.extract()
|
|
tag = BeautifulSoup(str(toc), 'html.parser')
|
|
tag.div['class'] = 'toc'
|
|
tag.div['id'] = ''
|
|
p = tag.find('p', class_='topic-title first')
|
|
if p:
|
|
p.extract()
|
|
toc = tag
|
|
|
|
# Pandoc reader (markdown and other formats)
|
|
if 'pandoc_reader' in content.settings['PLUGINS']:
|
|
try:
|
|
from pandoc_reader import PandocReader
|
|
except ImportError:
|
|
PandocReader = False
|
|
if not toc and PandocReader and PandocReader.enabled and extension in PandocReader.file_extensions:
|
|
toc = soup.find('nav', id='TOC')
|
|
|
|
if toc:
|
|
toc.extract()
|
|
content._content = soup.decode()
|
|
content.toc = toc.decode()
|
|
if content.toc.startswith('<html>'):
|
|
content.toc = content.toc[12:-14]
|
|
|
|
|
|
def register():
|
|
signals.content_object_init.connect(extract_toc)
|
|
|