|
@ -1,8 +1,15 @@ |
|
|
#!/bin/env python3 |
|
|
#!/usr/bin/env python3 |
|
|
# fedicrawler v5 |
|
|
# fedicrawler v5 |
|
|
|
|
|
|
|
|
|
|
|
import requests, json |
|
|
|
|
|
from multiprocessing.dummy import Pool as ThreadPool |
|
|
from urllib.parse import urlparse |
|
|
from urllib.parse import urlparse |
|
|
|
|
|
|
|
|
|
|
|
proxies = { |
|
|
|
|
|
'http':'socks5://localhost:12345', |
|
|
|
|
|
'http':'socks5://localhost:12345'} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def not_gab(instance): |
|
|
def not_gab(instance): |
|
|
#gab does some weird stuff wrt enumerating subdomains |
|
|
#gab does some weird stuff wrt enumerating subdomains |
|
|
#example: epa1pu1qcxxyzcxher0u.gab.best |
|
|
#example: epa1pu1qcxxyzcxher0u.gab.best |
|
@ -54,7 +61,7 @@ def get_peers(instance): |
|
|
except Exception as e: |
|
|
except Exception as e: |
|
|
#network errors etc |
|
|
#network errors etc |
|
|
#print('fail on',instance, e) |
|
|
#print('fail on',instance, e) |
|
|
#print(e) |
|
|
print(e) |
|
|
return |
|
|
return |
|
|
|
|
|
|
|
|
def get_nodeinfo(instance): |
|
|
def get_nodeinfo(instance): |
|
@ -107,14 +114,11 @@ def get_instance_info(instance): |
|
|
|
|
|
|
|
|
def fedicrawler(): |
|
|
def fedicrawler(): |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
start_url = 'https://post.lurk.org' |
|
|
start_url = 'https://post.lurk.org' |
|
|
activity = '' |
|
|
activity = '' |
|
|
peers_info ='/api/v1/instance/peers' |
|
|
peers_info ='/api/v1/instance/peers' |
|
|
|
|
|
|
|
|
proxies = { |
|
|
|
|
|
'http':'socks5://localhost:12345', |
|
|
|
|
|
'http':'socks5://localhost:12345'} |
|
|
|
|
|
|
|
|
|
|
|
pool = ThreadPool(512) |
|
|
pool = ThreadPool(512) |
|
|
|
|
|
|
|
|
filters = [not_gab, only_netloc] #what to filter out |
|
|
filters = [not_gab, only_netloc] #what to filter out |
|
|