Browse Source

first version, crawls only the announced peers

master
rra 1 year ago
commit
abbb8a6dd7
2 changed files with 6338 additions and 0 deletions
  1. 38
    0
      fedicrawler.py
  2. 6300
    0
      instances.txt

+ 38
- 0
fedicrawler.py View File

@@ -0,0 +1,38 @@
1
+#!/bin/env python3
2
+# fediscraper v1
3
+
4
+import json, requests
5
+
6
+
7
+start_url = 'https://post.lurk.org'
8
+activity = ''
9
+peers_info = '/api/v1/instance/peers'
10
+instance_info = '/api/v1/instance'
11
+
12
+instances = {set([])}
13
+r = requests.get(start_url+peers_info)
14
+if r. status_code == 200:
15
+	print('200 for '+start_url)
16
+	peers = r.json()
17
+	print('{} has {} peers'.format(start_url, len(peers)))
18
+	for count, peer in enumerate(peers):
19
+			instances.add(peer)
20
+			try:
21
+				r = requests.get('https://'+peer+peers_info, timeout=10)
22
+				if r.status_code == 200:
23
+					print(peer, 'peers with', len(r.json()))
24
+					for i in r.json():
25
+						if i not in instances:
26
+							instances.add(i)
27
+							print('added {}, n={}'.format(i,len(instances)))
28
+				else:
29
+					print(i.status_code, 'on', peer)
30
+			except Exception as e:
31
+				print('failure for', peer)
32
+				# instances[peer] = {'error':e}
33
+				print(e)
34
+
35
+text = list(filter(None.__ne__, instances)) 
36
+
37
+with open('instance_scrape.txt','w') as f:
38
+	f.write('\n'.join(text))

+ 6300
- 0
instances.txt
File diff suppressed because it is too large
View File


Loading…
Cancel
Save