Browse Source

made changes in file saving

master
rra 6 years ago
parent
commit
cb646ab40e
  1. 70
      fedicrawler.py

70
fedicrawler.py

@ -9,46 +9,46 @@ peers_info = '/api/v1/instance/peers'
instance_info = '/api/v1/instance' instance_info = '/api/v1/instance'
def get_peers(instance): def get_peers(instance):
r = requests.get('https://'+instance+'/api/v1/instance/peers', timeout=10) r = requests.get('https://'+instance+'/api/v1/instance/peers', timeout=10)
if r.status_code == 200: if r.status_code == 200:
peers = r.json() peers = r.json()
print(instance, 'peers with', len(peers)) print(instance, 'peers with', len(peers))
return(peers) return(peers)
else: else:
print('fail: got {} on {}'.format(r.status_code, instance)) print('fail: got {} on {}'.format(r.status_code, instance))
instances = {} instances = {}
r = requests.get(start_url+peers_info) r = requests.get(start_url+peers_info)
if r. status_code == 200: if r. status_code == 200:
print('200 for '+start_url) print('200 for '+start_url)
peers = r.json() peers = r.json()
print('{} has {} peers'.format(start_url, len(peers))) print('{} has {} peers'.format(start_url, len(peers)))
for count, peer in enumerate(peers): for count, peer in enumerate(peers):
#instances.add(peer) #instances.add(peer)
try: try:
peer_list = get_peers(peer) peer_list = get_peers(peer)
if peer_list: if peer_list:
for i in peer_list: for i in peer_list:
if i not in instances: if i not in instances:
try: try:
ii = requests.get('https://'+i+instance_info, timeout=10) ii = requests.get('https://'+i+instance_info, timeout=10)
info = ii.json() info = ii.json()
except Exception as e: except Exception as e:
print('failed to query instance info') print('failed to query instance info')
print(e) print(e)
info = 'error' info = 'error'
pass pass
instances[i] = info instances[i] = info
print('added {}, n={}'.format(i,len(instances))) print('added {}, n={}'.format(i,len(instances)))
else: else:
print(i.status_code, 'on', peer) print(i.status_code, 'on', peer)
except Exception as e: except Exception as e:
print('failure for', peer) print('failure for', peer)
# instances[peer] = {'error':e} # instances[peer] = {'error':e}
print(e) print(e)
#text = list(filter(None.__ne__, instances)) #text = list(filter(None.__ne__, instances))
with open('instance_scrape.json','w') as f: with open('instance_scrape.json','w') as f:
json.dumps(instances,f) f.write(json.dumps(instances,indent=4))
#f.write('\n'.join(text) #f.write('\n'.join(text)
Loading…
Cancel
Save