diff --git a/fedicrawler.py b/fedicrawler.py
index 710cbc8..6aabf08 100644
--- a/fedicrawler.py
+++ b/fedicrawler.py
@@ -3,11 +3,16 @@
 
 import json, requests
 from multiprocessing.dummy import Pool as ThreadPool 
+from urllib.parse import urlparse
 
 start_url = 'https://post.lurk.org'
 activity = ''
 peers_info ='/api/v1/instance/peers'
 
+proxies = {
+'http':'socks5://localhost:12345',
+'http':'socks5://localhost:12345'}
+
 pool = ThreadPool(512)
 
 def not_gab(instance):
@@ -15,75 +20,125 @@ def not_gab(instance):
     #example: epa1pu1qcxxyzcxher0u.gab.best
     if instance:
         if 'gab.best'in instance:
+            print('GAB', instance)
+            return False
+        elif 'ngrok.io' in instance:
+            print('NGROK', instance)
+            return False
+        elif 'glitch.me' in instance:
+            print('GLITCH', instance)
             return False
         else:
             return True
     else:
         return False
-    #TODO filter ngrok
 
+def only_netloc(instance):
+    #some peerlists return stuff like
+    #mastodon.social/users/blabla or
+    #domain.tld/friendica which are all invalid
+    return urlparse('https://'+instance).netloc
+
+
+def multi_filter(fs, l):
+    # https://www.reddit.com/r/Python/comments/6xefvp/applying_multiple_filters_to_a_list/
+    if not fs:
+        return l
+    return multi_filter(fs[1:], (x for x in l if fs[0](x)))
+    
 
 def get_peers(instance):
+    #this is really from the assumption that combined Mastodon & Pleroma 
+    #instances have enough view of entire fediverse to rely only on those
     try:
-        r = requests.get('https://'+instance+'/api/v1/instance/peers', timeout=1)
+        r = requests.get('https://'+instance+'/api/v1/instance/peers', timeout=3, proxies=proxies)
         if r.status_code == 200:
             peers = r.json()
             print(instance, 'peers with', len(peers))
             return peers
         else:
-            print('fail: got {} on {}'.format(r.status_code, instance))
+            # 404s etc
+            #print('fail: got {} on {}'.format(r.status_code, instance))
             return
     except Exception as e:
-        print('fail on',instance, e)
-        # print(e)
+        #network errors etc
+        #print('fail on',instance, e)
+        #print(e)
         return
 
-def get_instance_info(instance):
-    instance_info = '/api/v1/instance'
-    if not_gab(instance):
-        print('getting info for', instance)
-        try:
-            r = requests.get('https://'+instance+instance_info, timeout=10)
+def get_nodeinfo(instance):
+    nodeinfo_probe = '/.well-known/nodeinfo'
+    try:
+        r = requests.get('https://{}{}'.format(instance, nodeinfo_probe),timeout=3, proxies=proxies)
+        if r.status_code == 200:
+            nodeinfo_endpoint = r.json()['links'][0]['href']
+            print(nodeinfo_endpoint)
+            r = requests.get(nodeinfo_endpoint, timeout=3, proxies=proxies)
             if r.status_code == 200:
-                info = r.json()
-                print('info request for {} succeeded'.format(instance))
-            elif r.status_code == 400:
-                #try to see if its peertube, probably should use something better
-                pt = requests.get('https://'+instance+'/api/v1/config')
-                if r.status_code == 200:
-                    print('info request for {} succeeded, peertube'.format(instance))
-                    info = r.json()
-                else:
-                    info = {'error': r.status_code}
+                info = {'nodeinfo':r.json()}
+            else:
+                info = {'error': r.status_code}
+        else:
+           info = {'error': r.status_code}
+    except Exception as e:
+        info = {'error': str(e)}
+        #print(e)
+    return info
+
+
+
+def get_instance_info(instance):
+    ## no longer used but keeping aroudnd for later maybe
+    #instance_info = '/api/v1/instance'
+    instance_info = '/.well-known/nodeinfo'
+    try:
+        r = requests.get('https://'+instance+instance_info, timeout=10, proxies=proxies)
+        if r.status_code == 200:
+            info = r.json()
+            print('info request for {} succeeded'.format(instance))
+        elif r.status_code == 400:
+            #try to see if its peertube, probably should use a better method
+            pt = requests.get('https://'+instance+'/api/v1/config')
+            if pt.status_code == 200:
+                print('info request for {} succeeded, peertube'.format(instance))
+                info = pt.json()
             else:
                 info = {'error': r.status_code}
+        else:
+            #if we get any other http code.. probably needs fixing
+            info = {'error': r.status_code}
 
-        except Exception as e:
-            print('failed to query instance info')
-            # print(e)
-            info = {'error': str(e)}
-        return info
+    except ConnectionError as e:
+        info = {'error': 'Connection error: '+str(e)}
+    except Exception as e:
+        info = {'error': str(e)}
+    return info
 
+filters = [not_gab, only_netloc] #what to filter out
 
 instances = set([])
-r = requests.get(start_url+peers_info)
+r = requests.get(start_url+peers_info) # normal brain, initial peer list
 if r. status_code == 200:
-    start_peers = pool.map(get_peers, r.json()) 
+    start_peers = pool.map(get_peers, r.json())  #expanding brain, get all peers of those initial peers
     for i in start_peers:
         if not i:
             start_peers.remove(i)
         else:
             pool.map(instances.add, i)
+    # for i in r.json():
+    #     instances.add(i)
+    instances = set(multi_filter(filters,instances)) # apply filters before we move to network
 
-    network = pool.map(get_peers, instances)
+    network = pool.map(get_peers, instances) #galaxy brain, get all peers of all peers of the initial peers
     for peer_list in network:
         if peer_list:
             for instance in peer_list:
-                if not_gab(instance):
+                if not_gab(instance): #prevent gab.best subdomain enumeration
                     instances.add(instance)
 
+    instances = set(multi_filter(filters,instances))
 
-instance_info = pool.map(get_instance_info, instances)
+instance_info = pool.map(get_nodeinfo, instances)
 
 scrape = {}
 
@@ -97,8 +152,6 @@ print('found {} instances'.format(len(scrape)))
 pool.close()
 pool.join()
 
-
-
 with open('instance_scrape.json','w') as f:
     f.write(json.dumps(scrape,indent=4))
     #f.write('\n'.join(text)