Browse Source

first version, crawls only the announced peers

master
rra 2 years ago
commit
abbb8a6dd7
2 changed files with 6338 additions and 0 deletions
  1. +38
    -0
      fedicrawler.py
  2. +6300
    -0
      instances.txt

+ 38
- 0
fedicrawler.py View File

@@ -0,0 +1,38 @@
#!/bin/env python3
# fediscraper v1

import json, requests


start_url = 'https://post.lurk.org'
activity = ''
peers_info = '/api/v1/instance/peers'
instance_info = '/api/v1/instance'

instances = {set([])}
r = requests.get(start_url+peers_info)
if r. status_code == 200:
print('200 for '+start_url)
peers = r.json()
print('{} has {} peers'.format(start_url, len(peers)))
for count, peer in enumerate(peers):
instances.add(peer)
try:
r = requests.get('https://'+peer+peers_info, timeout=10)
if r.status_code == 200:
print(peer, 'peers with', len(r.json()))
for i in r.json():
if i not in instances:
instances.add(i)
print('added {}, n={}'.format(i,len(instances)))
else:
print(i.status_code, 'on', peer)
except Exception as e:
print('failure for', peer)
# instances[peer] = {'error':e}
print(e)

text = list(filter(None.__ne__, instances))

with open('instance_scrape.txt','w') as f:
f.write('\n'.join(text))

+ 6300
- 0
instances.txt
File diff suppressed because it is too large
View File


Loading…
Cancel
Save