strip html

This commit is contained in:
cellarspoon 2021-12-17 17:26:40 +01:00
parent c0b66bda7a
commit f443d1ba12
No known key found for this signature in database
GPG Key ID: 03789458B3D0C410

View File

@ -13,6 +13,33 @@ from reportlab.platypus import PageBreak, Paragraph, SimpleDocTemplate, Spacer
CWD = Path().resolve()
from html.parser import HTMLParser
from io import StringIO
class MLStripper(HTMLParser):
"""https://stackoverflow.com/a/925630"""
def __init__(self):
super().__init__()
self.reset()
self.strict = False
self.convert_charrefs = True
self.text = StringIO()
def handle_data(self, d):
self.text.write(d)
def get_data(self):
return self.text.getvalue()
def strip_tags(html):
"""HTML stripper."""
s = MLStripper()
s.feed(html)
return s.get_data()
def make_cards(filepath, db_path, side_a, side_b):
"""The main entrypoint for card generation."""
@ -26,13 +53,13 @@ def make_cards(filepath, db_path, side_a, side_b):
def select_fields(fields, content, styles, book):
if "title" in fields:
tag = "<font size=12>{}</font>".format(book.title)
ptitle = Paragraph(tag, styles["Italic"])
ptitle = Paragraph(strip_tags(tag), styles["Italic"])
content.append(ptitle)
content.append(Spacer(1, 12))
if "timestamp" in fields:
tag = "<font size=10>Timestamp: {}</font>".format(book.timestamp)
ptime = Paragraph(tag, styles["Normal"])
ptime = Paragraph(strip_tags(tag), styles["Normal"])
content.append(ptime)
content.append(Spacer(1, 12))
@ -44,7 +71,7 @@ def select_fields(fields, content, styles, book):
]
)
tag = "<font size=10>{}</font>".format(comments)
pcomments = Paragraph(tag)
pcomments = Paragraph(strip_tags(tag))
content.append(pcomments)
if "authors" in fields:
@ -52,7 +79,7 @@ def select_fields(fields, content, styles, book):
all_authors = [author.name for author in book.authors]
glued_together = format_string.format(", ".join(all_authors))
p = Paragraph(glued_together, styles["Normal"])
p = Paragraph(strip_tags(glued_together), styles["Normal"])
content.append(p)
content.append(Spacer(6, 12))
@ -61,7 +88,7 @@ def select_fields(fields, content, styles, book):
all_tags = [tag.name for tag in book.tags]
tags_glued_together = format_string.format(", ".join(all_tags))
p = Paragraph(tags_glued_together, styles["Normal"])
p = Paragraph(strip_tags(tags_glued_together), styles["Normal"])
content.append(p)
content.append(Spacer(6, 12))