working on making the script importable by about_collector
This commit is contained in:
parent
27a6fb1a0a
commit
65ddc49057
@ -1,20 +1,8 @@
|
||||
#!/bin/env python3
|
||||
# fedicrawler v4
|
||||
# fedicrawler v5
|
||||
|
||||
import json, requests
|
||||
from multiprocessing.dummy import Pool as ThreadPool
|
||||
from urllib.parse import urlparse
|
||||
|
||||
start_url = 'https://post.lurk.org'
|
||||
activity = ''
|
||||
peers_info ='/api/v1/instance/peers'
|
||||
|
||||
proxies = {
|
||||
'http':'socks5://localhost:12345',
|
||||
'http':'socks5://localhost:12345'}
|
||||
|
||||
pool = ThreadPool(512)
|
||||
|
||||
def not_gab(instance):
|
||||
#gab does some weird stuff wrt enumerating subdomains
|
||||
#example: epa1pu1qcxxyzcxher0u.gab.best
|
||||
@ -117,43 +105,58 @@ def get_instance_info(instance):
|
||||
info = {'error': str(e)}
|
||||
return info
|
||||
|
||||
filters = [not_gab, only_netloc] #what to filter out
|
||||
def fedicrawler():
|
||||
|
||||
instances = set([])
|
||||
r = requests.get(start_url+peers_info) # normal brain, initial peer list
|
||||
if r. status_code == 200:
|
||||
start_peers = pool.map(get_peers, r.json()) #expanding brain, get all peers of those initial peers
|
||||
for i in start_peers:
|
||||
if not i:
|
||||
start_peers.remove(i)
|
||||
else:
|
||||
pool.map(instances.add, i)
|
||||
# for i in r.json():
|
||||
# instances.add(i)
|
||||
instances = set(multi_filter(filters,instances)) # apply filters before we move to network
|
||||
start_url = 'https://post.lurk.org'
|
||||
activity = ''
|
||||
peers_info ='/api/v1/instance/peers'
|
||||
|
||||
network = pool.map(get_peers, instances) #galaxy brain, get all peers of all peers of the initial peers
|
||||
for peer_list in network:
|
||||
if peer_list:
|
||||
for instance in peer_list:
|
||||
if not_gab(instance): #prevent gab.best subdomain enumeration
|
||||
instances.add(instance)
|
||||
proxies = {
|
||||
'http':'socks5://localhost:12345',
|
||||
'http':'socks5://localhost:12345'}
|
||||
|
||||
instances = set(multi_filter(filters,instances))
|
||||
pool = ThreadPool(512)
|
||||
|
||||
instance_info = pool.map(get_nodeinfo, instances)
|
||||
filters = [not_gab, only_netloc] #what to filter out
|
||||
|
||||
scrape = {}
|
||||
instances_list = list(instances)
|
||||
instances = set([])
|
||||
r = requests.get(start_url+peers_info) # normal brain, initial peer list
|
||||
if r. status_code == 200:
|
||||
start_peers = pool.map(get_peers, r.json()) #expanding brain, get all peers of those initial peers
|
||||
for i in start_peers:
|
||||
if not i:
|
||||
start_peers.remove(i)
|
||||
else:
|
||||
pool.map(instances.add, i)
|
||||
# for i in r.json():
|
||||
# instances.add(i)
|
||||
instances = set(multi_filter(filters,instances)) # apply filters before we move to network
|
||||
|
||||
for count, value in enumerate(instances_list):
|
||||
scrape[value] = instance_info[count]
|
||||
network = pool.map(get_peers, instances) #galaxy brain, get all peers of all peers of the initial peers
|
||||
for peer_list in network:
|
||||
if peer_list:
|
||||
for instance in peer_list:
|
||||
if not_gab(instance): #prevent gab.best subdomain enumeration
|
||||
instances.add(instance)
|
||||
|
||||
print('found {} instances'.format(len(scrape)))
|
||||
instances = set(multi_filter(filters,instances))
|
||||
|
||||
pool.close()
|
||||
pool.join()
|
||||
instance_info = pool.map(get_nodeinfo, instances)
|
||||
|
||||
with open('instance_scrape.json','w') as f:
|
||||
f.write(json.dumps(scrape,indent=4))
|
||||
#f.write('\n'.join(text)
|
||||
scrape = {}
|
||||
instances_list = list(instances)
|
||||
|
||||
for count, value in enumerate(instances_list):
|
||||
scrape[value] = instance_info[count]
|
||||
|
||||
print('found {} instances'.format(len(scrape)))
|
||||
|
||||
pool.close()
|
||||
pool.join()
|
||||
|
||||
with open('instance_scrape.json','w') as f:
|
||||
f.write(json.dumps(scrape,indent=4))
|
||||
#f.write('\n'.join(text)
|
||||
|
||||
if __name__ == '__main__':
|
||||
fedicrawler()
|
Loading…
Reference in New Issue
Block a user