diff --git a/fedicrawler.py b/fedicrawler.py index 87f1511..de719ae 100644 --- a/fedicrawler.py +++ b/fedicrawler.py @@ -1,8 +1,15 @@ -#!/bin/env python3 +#!/usr/bin/env python3 # fedicrawler v5 +import requests, json +from multiprocessing.dummy import Pool as ThreadPool from urllib.parse import urlparse +proxies = { +'http':'socks5://localhost:12345', +'http':'socks5://localhost:12345'} + + def not_gab(instance): #gab does some weird stuff wrt enumerating subdomains #example: epa1pu1qcxxyzcxher0u.gab.best @@ -54,7 +61,7 @@ def get_peers(instance): except Exception as e: #network errors etc #print('fail on',instance, e) - #print(e) + print(e) return def get_nodeinfo(instance): @@ -107,14 +114,11 @@ def get_instance_info(instance): def fedicrawler(): + start_url = 'https://post.lurk.org' activity = '' peers_info ='/api/v1/instance/peers' - proxies = { - 'http':'socks5://localhost:12345', - 'http':'socks5://localhost:12345'} - pool = ThreadPool(512) filters = [not_gab, only_netloc] #what to filter out