From f62bff244c652c7855ff391081438d108d2c9608 Mon Sep 17 00:00:00 2001 From: rra Date: Tue, 5 May 2020 16:49:24 +0200 Subject: [PATCH] repairs --- fedicrawler.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/fedicrawler.py b/fedicrawler.py index 87f1511..de719ae 100644 --- a/fedicrawler.py +++ b/fedicrawler.py @@ -1,8 +1,15 @@ -#!/bin/env python3 +#!/usr/bin/env python3 # fedicrawler v5 +import requests, json +from multiprocessing.dummy import Pool as ThreadPool from urllib.parse import urlparse +proxies = { +'http':'socks5://localhost:12345', +'http':'socks5://localhost:12345'} + + def not_gab(instance): #gab does some weird stuff wrt enumerating subdomains #example: epa1pu1qcxxyzcxher0u.gab.best @@ -54,7 +61,7 @@ def get_peers(instance): except Exception as e: #network errors etc #print('fail on',instance, e) - #print(e) + print(e) return def get_nodeinfo(instance): @@ -107,14 +114,11 @@ def get_instance_info(instance): def fedicrawler(): + start_url = 'https://post.lurk.org' activity = '' peers_info ='/api/v1/instance/peers' - proxies = { - 'http':'socks5://localhost:12345', - 'http':'socks5://localhost:12345'} - pool = ThreadPool(512) filters = [not_gab, only_netloc] #what to filter out