|
@ -1,11 +1,11 @@ |
|
|
#!/bin/env python3 |
|
|
#!/usr/bin/env python3 |
|
|
# a tool for collecting mastodon /about/ & ToS and CoC pages. |
|
|
# a tool for collecting mastodon /about/ & ToS and CoC pages. |
|
|
# (c) roel roscam abbing 2020 |
|
|
# (c) roel roscam abbing 2020 |
|
|
# gplv3 |
|
|
# gplv3 |
|
|
import os, time, json |
|
|
import os, time, json |
|
|
import fedicrawler |
|
|
from fedicrawler import only_netloc, multi_filter |
|
|
|
|
|
|
|
|
filters = [fedicrawler.only_netloc] |
|
|
filters = [only_netloc] |
|
|
|
|
|
|
|
|
scrape_data = json.loads(open('instance_scrape.json').read()) |
|
|
scrape_data = json.loads(open('instance_scrape.json').read()) |
|
|
|
|
|
|
|
@ -26,7 +26,7 @@ def find_mastodon_instances(scrape_data): |
|
|
if scrape_data[i]['nodeinfo']['software']['name'].lower() == 'mastodon': |
|
|
if scrape_data[i]['nodeinfo']['software']['name'].lower() == 'mastodon': |
|
|
mastodon_instances.append(i) |
|
|
mastodon_instances.append(i) |
|
|
|
|
|
|
|
|
#mastodon_instances = fedicrawler.multi_filter(filters,mastodon_instances) |
|
|
mastodon_instances = list(multi_filter(filters,mastodon_instances)) |
|
|
return mastodon_instances |
|
|
return mastodon_instances |
|
|
|
|
|
|
|
|
|
|
|
|
|
@ -37,7 +37,6 @@ options = Options() |
|
|
options.add_argument('-headless') |
|
|
options.add_argument('-headless') |
|
|
|
|
|
|
|
|
browser = webdriver.Firefox(options=options) |
|
|
browser = webdriver.Firefox(options=options) |
|
|
#browser.set_window_size(1024, 768) # set the window size that you need |
|
|
|
|
|
|
|
|
|
|
|
for mi in find_mastodon_instances(scrape_data): |
|
|
for mi in find_mastodon_instances(scrape_data): |
|
|
|
|
|
|
|
|