#!/bin/env python3 # fediscraper v1 import json, requests,threading start_url = 'https://post.lurk.org' activity = '' peers_info = '/api/v1/instance/peers' instance_info = '/api/v1/instance' def get_peers(instance): r = requests.get('https://'+instance+'/api/v1/instance/peers', timeout=10) if r.status_code == 200: peers = r.json() print(instance, 'peers with', len(peers)) return(peers) else: print('fail: got {} on {}'.format(r.status_code, instance)) instances = {} r = requests.get(start_url+peers_info) if r. status_code == 200: print('200 for '+start_url) peers = r.json() print('{} has {} peers'.format(start_url, len(peers))) for count, peer in enumerate(peers): #instances.add(peer) try: peer_list = get_peers(peer) if peer_list: for i in peer_list: if i not in instances: try: ii = requests.get('https://'+i+instance_info, timeout=10) info = ii.json() except Exception as e: print('failed to query instance info') print(e) info = 'error' pass instances[i] = info print('added {}, n={}'.format(i,len(instances))) else: print(i.status_code, 'on', peer) except Exception as e: print('failure for', peer) # instances[peer] = {'error':e} print(e) #text = list(filter(None.__ne__, instances)) with open('instance_scrape.json','w') as f: f.write(json.dumps(instances,indent=4)) #f.write('\n'.join(text)