#!/bin/env python3 # fediscraper v2 import json, requests from multiprocessing.dummy import Pool as ThreadPool start_url = 'https://post.lurk.org' activity = '' peers_info ='/api/v1/instance/peers' pool = ThreadPool(256) def get_peers(instance): try: r = requests.get('https://'+instance+'/api/v1/instance/peers', timeout=1) if r.status_code == 200: peers = r.json() print(instance, 'peers with', len(peers)) return peers else: print('fail: got {} on {}'.format(r.status_code, instance)) return except Exception as e: print('fail on',instance, e) # print(e) return def get_instance_info(instance): instance_info = '/api/v1/instance' print('getting info for', instance) try: ii = requests.get('https://'+instance+instance_info, timeout=10) info = ii.json() print('info request for {} succeeded'.format(instance)) except Exception as e: print('failed to query instance info') # print(e) info = {'error': 'error'} return info instances = set([]) r = requests.get(start_url+peers_info) if r. status_code == 200: start_peers = pool.map(get_peers, r.json()) for i in start_peers: if not i: start_peers.remove(i) else: pool.map(instances.add, i) network = pool.map(get_peers, instances) for peer_list in network: if peer_list: for instance in peer_list: instances.add(instance) instance_info = pool.map(get_instance_info, instances) scrape = {} instances_list = list(instances) for count, value in enumerate(instances_list): scrape[value] = instance_info[count] print('found {} instances'.format(len(scrape))) pool.close() pool.join() with open('instance_scrape.json','w') as f: f.write(json.dumps(scrape,indent=4)) #f.write('\n'.join(text)