Browse Source

minor tweaks

master
rra 10 months ago
parent
commit
27a5c9b1d7
2 changed files with 9 additions and 3 deletions
  1. +6
    -2
      about_collector.py
  2. +3
    -1
      fedicrawler.py

+ 6
- 2
about_collector.py View File

@ -41,7 +41,8 @@ browser = webdriver.Firefox(options=options)
for mi in find_mastodon_instances(scrape_data):
if not os.path.exists(os.path.join(output_dir,mi+'.png')) and not os.path.exists(os.path.join(tos_dir,mi+'.png')):
try:
print('🡓', mi)
browser.get('https://{}/about/more/'.format(mi))
page = browser.find_element_by_tag_name('html')
height=page.size['height']
@ -61,10 +62,13 @@ for mi in find_mastodon_instances(scrape_data):
print(e)
time.sleep(0.1)
print('taking screenshot of', mi)
print('📷', mi)
browser.save_screenshot(os.path.join(output_dir,mi+'.png'))
with open(os.path.join(output_dir,mi+'_about.html'),'w') as f:
f.write(browser.page_source)
print(mi,'')
except Exception as e:
print(e)
else:
print(mi, '')
#browser.save_screenshot()


+ 3
- 1
fedicrawler.py View File

@ -35,7 +35,9 @@ def only_netloc(instance):
#some peerlists return stuff like
#mastodon.social/users/blabla or
#domain.tld/friendica which are all invalid
return urlparse('https://'+instance).netloc
netloc = urlparse('https://'+instance).netloc
print(netloc)
return netloc
def multi_filter(fs, l):


Loading…
Cancel
Save