From cb646ab40e317e9d2e49e309ee11af6efe0a8ed7 Mon Sep 17 00:00:00 2001 From: rra Date: Wed, 30 May 2018 13:04:56 +0200 Subject: [PATCH] made changes in file saving --- fedicrawler.py | 70 +++++++++++++++++++++++++------------------------- 1 file changed, 35 insertions(+), 35 deletions(-) diff --git a/fedicrawler.py b/fedicrawler.py index 983cb89..a6ec2a1 100644 --- a/fedicrawler.py +++ b/fedicrawler.py @@ -9,46 +9,46 @@ peers_info = '/api/v1/instance/peers' instance_info = '/api/v1/instance' def get_peers(instance): - r = requests.get('https://'+instance+'/api/v1/instance/peers', timeout=10) - if r.status_code == 200: - peers = r.json() - print(instance, 'peers with', len(peers)) - return(peers) - else: - print('fail: got {} on {}'.format(r.status_code, instance)) + r = requests.get('https://'+instance+'/api/v1/instance/peers', timeout=10) + if r.status_code == 200: + peers = r.json() + print(instance, 'peers with', len(peers)) + return(peers) + else: + print('fail: got {} on {}'.format(r.status_code, instance)) instances = {} r = requests.get(start_url+peers_info) if r. status_code == 200: - print('200 for '+start_url) - peers = r.json() - print('{} has {} peers'.format(start_url, len(peers))) - for count, peer in enumerate(peers): - #instances.add(peer) - try: - peer_list = get_peers(peer) - if peer_list: - for i in peer_list: - if i not in instances: - try: - ii = requests.get('https://'+i+instance_info, timeout=10) - info = ii.json() - except Exception as e: - print('failed to query instance info') - print(e) - info = 'error' - pass - instances[i] = info - print('added {}, n={}'.format(i,len(instances))) - else: - print(i.status_code, 'on', peer) - except Exception as e: - print('failure for', peer) - # instances[peer] = {'error':e} - print(e) + print('200 for '+start_url) + peers = r.json() + print('{} has {} peers'.format(start_url, len(peers))) + for count, peer in enumerate(peers): + #instances.add(peer) + try: + peer_list = get_peers(peer) + if peer_list: + for i in peer_list: + if i not in instances: + try: + ii = requests.get('https://'+i+instance_info, timeout=10) + info = ii.json() + except Exception as e: + print('failed to query instance info') + print(e) + info = 'error' + pass + instances[i] = info + print('added {}, n={}'.format(i,len(instances))) + else: + print(i.status_code, 'on', peer) + except Exception as e: + print('failure for', peer) + # instances[peer] = {'error':e} + print(e) #text = list(filter(None.__ne__, instances)) with open('instance_scrape.json','w') as f: - json.dumps(instances,f) - #f.write('\n'.join(text) \ No newline at end of file + f.write(json.dumps(instances,indent=4)) + #f.write('\n'.join(text) \ No newline at end of file