started with a splitter for h2 headers

2021-09-08 17:42:03 +02:00 · 2021-09-08 17:42:03 +02:00 · a97430278d
commit a97430278d
parent 1a0c35dc03
1 changed files with 11 additions and 0 deletions
--- a/command-line/update.py
+++ b/command-line/update.py
@ -80,6 +80,16 @@ def download_media(html, images, wiki):
 	return html
 def split_h2_header(html):
 	"""
 		html = string (HTML)
 		DOES NOT WORK YET!
 	"""
 	pattern = '<h2><span class="mw-headline" id=".*?">.*?:.*?</h2>' # split the h2 in two on the ":"
 	result = re.split(pattern, html) 
 	# print(result[0])
 	return html
 def clean_up(html):
 	"""
 		html = string (HTML)
@ -103,6 +113,7 @@ def parse_page(pagename, wiki):
 		images = data['parse']['images']
 		html = download_media(html, images, wiki)
 		html = clean_up(html)
 		html = split_h2_header(html)
 	else: 
 		html = None