Browse Source

minor tweaks to fedicrawler & scrape 05-05-2020

master
rra 5 years ago
parent
commit
3f5d2bbad0
  1. 6
      fedicrawler.py
  2. 344561
      instance_scrape.json

6
fedicrawler.py

@ -1,5 +1,5 @@
#!/bin/env python3 #!/bin/env python3
# fediscraper v3 # fedicrawler v4
import json, requests import json, requests
from multiprocessing.dummy import Pool as ThreadPool from multiprocessing.dummy import Pool as ThreadPool
@ -22,12 +22,15 @@ def not_gab(instance):
if 'gab.best'in instance: if 'gab.best'in instance:
print('GAB', instance) print('GAB', instance)
return False return False
# the ones below are mostly used for testing apps
elif 'ngrok.io' in instance: elif 'ngrok.io' in instance:
print('NGROK', instance) print('NGROK', instance)
return False return False
elif 'glitch.me' in instance: elif 'glitch.me' in instance:
print('GLITCH', instance) print('GLITCH', instance)
return False return False
elif 'netlify.app' in instance:
print('NETLIFY', instance)
else: else:
return True return True
else: else:
@ -141,7 +144,6 @@ if r. status_code == 200:
instance_info = pool.map(get_nodeinfo, instances) instance_info = pool.map(get_nodeinfo, instances)
scrape = {} scrape = {}
instances_list = list(instances) instances_list = list(instances)
for count, value in enumerate(instances_list): for count, value in enumerate(instances_list):

344561
instance_scrape.json

File diff suppressed because it is too large
Loading…
Cancel
Save