crawler now looks for instance metadata, started to abstract collection into functions
This commit is contained in:
parent
d90366d4c6
commit
4cac59e445
@ -1,30 +1,45 @@
|
||||
#!/bin/env python3
|
||||
# fediscraper v1
|
||||
|
||||
import json, requests
|
||||
|
||||
import json, requests,threading
|
||||
|
||||
start_url = 'https://post.lurk.org'
|
||||
activity = ''
|
||||
peers_info = '/api/v1/instance/peers'
|
||||
instance_info = '/api/v1/instance'
|
||||
|
||||
instances = {set([])}
|
||||
def get_peers(instance):
|
||||
r = requests.get('https://'+instance+'/api/v1/instance/peers', timeout=10)
|
||||
if r.status_code == 200:
|
||||
peers = r.json()
|
||||
print(instance, 'peers with', len(peers))
|
||||
return(peers)
|
||||
else:
|
||||
print('fail: got {} on {}'.format(r.status_code, instance))
|
||||
|
||||
instances = {}
|
||||
r = requests.get(start_url+peers_info)
|
||||
if r. status_code == 200:
|
||||
print('200 for '+start_url)
|
||||
peers = r.json()
|
||||
print('{} has {} peers'.format(start_url, len(peers)))
|
||||
for count, peer in enumerate(peers):
|
||||
instances.add(peer)
|
||||
#instances.add(peer)
|
||||
try:
|
||||
r = requests.get('https://'+peer+peers_info, timeout=10)
|
||||
if r.status_code == 200:
|
||||
print(peer, 'peers with', len(r.json()))
|
||||
for i in r.json():
|
||||
peer_list = get_peers(peer)
|
||||
if peer_list:
|
||||
for i in peer_list:
|
||||
if i not in instances:
|
||||
instances.add(i)
|
||||
print('added {}, n={}'.format(i,len(instances)))
|
||||
try:
|
||||
ii = requests.get('https://'+i+instance_info, timeout=10)
|
||||
info = ii.json()
|
||||
except Exception as e:
|
||||
print('failed to query instance info')
|
||||
print(e)
|
||||
info = 'error'
|
||||
pass
|
||||
instances[i] = info
|
||||
print('added {}, n={}'.format(i,len(instances)))
|
||||
else:
|
||||
print(i.status_code, 'on', peer)
|
||||
except Exception as e:
|
||||
@ -32,7 +47,8 @@ if r. status_code == 200:
|
||||
# instances[peer] = {'error':e}
|
||||
print(e)
|
||||
|
||||
text = list(filter(None.__ne__, instances))
|
||||
#text = list(filter(None.__ne__, instances))
|
||||
|
||||
with open('instance_scrape.txt','w') as f:
|
||||
f.write('\n'.join(text))
|
||||
with open('instance_scrape.json','w') as f:
|
||||
json.dumps(instances,f)
|
||||
#f.write('\n'.join(text)
|
Loading…
Reference in New Issue
Block a user