Browse Source

about_crawler now properly uses functions from fedicrawler

master
rra 4 years ago
parent
commit
2278cd9d3d
  1. 9
      about_collector.py

9
about_collector.py

@ -1,11 +1,11 @@
#!/bin/env python3 #!/usr/bin/env python3
# a tool for collecting mastodon /about/ & ToS and CoC pages. # a tool for collecting mastodon /about/ & ToS and CoC pages.
# (c) roel roscam abbing 2020 # (c) roel roscam abbing 2020
# gplv3 # gplv3
import os, time, json import os, time, json
import fedicrawler from fedicrawler import only_netloc, multi_filter
filters = [fedicrawler.only_netloc] filters = [only_netloc]
scrape_data = json.loads(open('instance_scrape.json').read()) scrape_data = json.loads(open('instance_scrape.json').read())
@ -26,7 +26,7 @@ def find_mastodon_instances(scrape_data):
if scrape_data[i]['nodeinfo']['software']['name'].lower() == 'mastodon': if scrape_data[i]['nodeinfo']['software']['name'].lower() == 'mastodon':
mastodon_instances.append(i) mastodon_instances.append(i)
#mastodon_instances = fedicrawler.multi_filter(filters,mastodon_instances) mastodon_instances = list(multi_filter(filters,mastodon_instances))
return mastodon_instances return mastodon_instances
@ -37,7 +37,6 @@ options = Options()
options.add_argument('-headless') options.add_argument('-headless')
browser = webdriver.Firefox(options=options) browser = webdriver.Firefox(options=options)
#browser.set_window_size(1024, 768) # set the window size that you need
for mi in find_mastodon_instances(scrape_data): for mi in find_mastodon_instances(scrape_data):

Loading…
Cancel
Save