first version, crawls only the announced peers
This commit is contained in:
commit
abbb8a6dd7
38
fedicrawler.py
Normal file
38
fedicrawler.py
Normal file
@ -0,0 +1,38 @@
|
|||||||
|
#!/bin/env python3
|
||||||
|
# fediscraper v1
|
||||||
|
|
||||||
|
import json, requests
|
||||||
|
|
||||||
|
|
||||||
|
start_url = 'https://post.lurk.org'
|
||||||
|
activity = ''
|
||||||
|
peers_info = '/api/v1/instance/peers'
|
||||||
|
instance_info = '/api/v1/instance'
|
||||||
|
|
||||||
|
instances = {set([])}
|
||||||
|
r = requests.get(start_url+peers_info)
|
||||||
|
if r. status_code == 200:
|
||||||
|
print('200 for '+start_url)
|
||||||
|
peers = r.json()
|
||||||
|
print('{} has {} peers'.format(start_url, len(peers)))
|
||||||
|
for count, peer in enumerate(peers):
|
||||||
|
instances.add(peer)
|
||||||
|
try:
|
||||||
|
r = requests.get('https://'+peer+peers_info, timeout=10)
|
||||||
|
if r.status_code == 200:
|
||||||
|
print(peer, 'peers with', len(r.json()))
|
||||||
|
for i in r.json():
|
||||||
|
if i not in instances:
|
||||||
|
instances.add(i)
|
||||||
|
print('added {}, n={}'.format(i,len(instances)))
|
||||||
|
else:
|
||||||
|
print(i.status_code, 'on', peer)
|
||||||
|
except Exception as e:
|
||||||
|
print('failure for', peer)
|
||||||
|
# instances[peer] = {'error':e}
|
||||||
|
print(e)
|
||||||
|
|
||||||
|
text = list(filter(None.__ne__, instances))
|
||||||
|
|
||||||
|
with open('instance_scrape.txt','w') as f:
|
||||||
|
f.write('\n'.join(text))
|
6300
instances.txt
Normal file
6300
instances.txt
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user