From a97430278df7dc6193f15c5d2d192e81973fc825 Mon Sep 17 00:00:00 2001 From: manetta Date: Wed, 8 Sep 2021 17:42:03 +0200 Subject: [PATCH] started with a splitter for h2 headers --- command-line/update.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/command-line/update.py b/command-line/update.py index f7c3737..55466a6 100644 --- a/command-line/update.py +++ b/command-line/update.py @@ -80,6 +80,16 @@ def download_media(html, images, wiki): return html +def split_h2_header(html): + """ + html = string (HTML) + DOES NOT WORK YET! + """ + pattern = '

.*?:.*?

' # split the h2 in two on the ":" + result = re.split(pattern, html) + # print(result[0]) + return html + def clean_up(html): """ html = string (HTML) @@ -103,6 +113,7 @@ def parse_page(pagename, wiki): images = data['parse']['images'] html = download_media(html, images, wiki) html = clean_up(html) + html = split_h2_header(html) else: html = None