From 2278cd9d3d032161fa49326d16edef398766a653 Mon Sep 17 00:00:00 2001 From: rra Date: Tue, 5 May 2020 16:55:59 +0200 Subject: [PATCH] about_crawler now properly uses functions from fedicrawler --- about_collector.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/about_collector.py b/about_collector.py index 03ceb2b..cf6c021 100644 --- a/about_collector.py +++ b/about_collector.py @@ -1,11 +1,11 @@ -#!/bin/env python3 +#!/usr/bin/env python3 # a tool for collecting mastodon /about/ & ToS and CoC pages. # (c) roel roscam abbing 2020 # gplv3 import os, time, json -import fedicrawler +from fedicrawler import only_netloc, multi_filter -filters = [fedicrawler.only_netloc] +filters = [only_netloc] scrape_data = json.loads(open('instance_scrape.json').read()) @@ -26,7 +26,7 @@ def find_mastodon_instances(scrape_data): if scrape_data[i]['nodeinfo']['software']['name'].lower() == 'mastodon': mastodon_instances.append(i) - #mastodon_instances = fedicrawler.multi_filter(filters,mastodon_instances) + mastodon_instances = list(multi_filter(filters,mastodon_instances)) return mastodon_instances @@ -37,7 +37,6 @@ options = Options() options.add_argument('-headless') browser = webdriver.Firefox(options=options) -#browser.set_window_size(1024, 768) # set the window size that you need for mi in find_mastodon_instances(scrape_data):