|
@ -1,29 +1,44 @@ |
|
|
#!/bin/env python3 |
|
|
#!/bin/env python3 |
|
|
# fediscraper v1 |
|
|
# fediscraper v1 |
|
|
|
|
|
|
|
|
import json, requests |
|
|
import json, requests,threading |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
start_url = 'https://post.lurk.org' |
|
|
start_url = 'https://post.lurk.org' |
|
|
activity = '' |
|
|
activity = '' |
|
|
peers_info = '/api/v1/instance/peers' |
|
|
peers_info = '/api/v1/instance/peers' |
|
|
instance_info = '/api/v1/instance' |
|
|
instance_info = '/api/v1/instance' |
|
|
|
|
|
|
|
|
instances = {set([])} |
|
|
def get_peers(instance): |
|
|
|
|
|
r = requests.get('https://'+instance+'/api/v1/instance/peers', timeout=10) |
|
|
|
|
|
if r.status_code == 200: |
|
|
|
|
|
peers = r.json() |
|
|
|
|
|
print(instance, 'peers with', len(peers)) |
|
|
|
|
|
return(peers) |
|
|
|
|
|
else: |
|
|
|
|
|
print('fail: got {} on {}'.format(r.status_code, instance)) |
|
|
|
|
|
|
|
|
|
|
|
instances = {} |
|
|
r = requests.get(start_url+peers_info) |
|
|
r = requests.get(start_url+peers_info) |
|
|
if r. status_code == 200: |
|
|
if r. status_code == 200: |
|
|
print('200 for '+start_url) |
|
|
print('200 for '+start_url) |
|
|
peers = r.json() |
|
|
peers = r.json() |
|
|
print('{} has {} peers'.format(start_url, len(peers))) |
|
|
print('{} has {} peers'.format(start_url, len(peers))) |
|
|
for count, peer in enumerate(peers): |
|
|
for count, peer in enumerate(peers): |
|
|
instances.add(peer) |
|
|
#instances.add(peer) |
|
|
try: |
|
|
try: |
|
|
r = requests.get('https://'+peer+peers_info, timeout=10) |
|
|
peer_list = get_peers(peer) |
|
|
if r.status_code == 200: |
|
|
if peer_list: |
|
|
print(peer, 'peers with', len(r.json())) |
|
|
for i in peer_list: |
|
|
for i in r.json(): |
|
|
|
|
|
if i not in instances: |
|
|
if i not in instances: |
|
|
instances.add(i) |
|
|
try: |
|
|
|
|
|
ii = requests.get('https://'+i+instance_info, timeout=10) |
|
|
|
|
|
info = ii.json() |
|
|
|
|
|
except Exception as e: |
|
|
|
|
|
print('failed to query instance info') |
|
|
|
|
|
print(e) |
|
|
|
|
|
info = 'error' |
|
|
|
|
|
pass |
|
|
|
|
|
instances[i] = info |
|
|
print('added {}, n={}'.format(i,len(instances))) |
|
|
print('added {}, n={}'.format(i,len(instances))) |
|
|
else: |
|
|
else: |
|
|
print(i.status_code, 'on', peer) |
|
|
print(i.status_code, 'on', peer) |
|
@ -32,7 +47,8 @@ if r. status_code == 200: |
|
|
# instances[peer] = {'error':e} |
|
|
# instances[peer] = {'error':e} |
|
|
print(e) |
|
|
print(e) |
|
|
|
|
|
|
|
|
text = list(filter(None.__ne__, instances)) |
|
|
#text = list(filter(None.__ne__, instances)) |
|
|
|
|
|
|
|
|
with open('instance_scrape.txt','w') as f: |
|
|
with open('instance_scrape.json','w') as f: |
|
|
f.write('\n'.join(text)) |
|
|
json.dumps(instances,f) |
|
|
|
|
|
#f.write('\n'.join(text) |