xppl/app/getannot.py

68 lines
2.3 KiB
Python
Raw Normal View History

2018-06-12 19:55:56 +02:00
# #https://gist.github.com/mjlavin80/186a6395c5819dbe25a8a0e001d5acfd
2018-06-13 16:49:14 +02:00
from flask import request
2018-06-06 18:54:07 +02:00
import requests
import json
2018-06-12 19:55:56 +02:00
# This script demonstrates how to query annotations for a particular URL using the hypothes.is API. An API key is required.
# The end result of this script is a Python dictionary with annotation data in it. Top save to csv or other format, further parsing would be required
2018-06-06 18:54:07 +02:00
def get_annotations():
2018-06-12 19:55:56 +02:00
KEY = "6879-n8AksBoSB7kYoQ3eEwzpEr3nFQEmSp3XN-0PcKL_Sik"
# URL = "https://monoskop.org/Monoskop"
#a dictionary containing necessary http headers
headers = {
"Host": "hypothes.is",
"Accept": "application/json",
"Authorization": "Bearer %s" % KEY
}
2018-06-06 18:54:07 +02:00
2018-06-12 19:55:56 +02:00
base_url = "https://hypothes.is/api/search?user=xpub@hypothes.is"
2018-06-06 18:54:07 +02:00
2018-06-12 19:55:56 +02:00
search_url = "".join([base_url])
2018-06-06 18:54:07 +02:00
2018-06-12 19:55:56 +02:00
r = requests.get(search_url, headers=headers)
#data is a python dictionary
data = json.loads(r.text)
2018-06-11 13:32:18 +02:00
2018-06-12 19:55:56 +02:00
# r = requests.get(search_url, headers=headers)
# data = json.loads(r.text)
return data
2018-06-11 13:32:18 +02:00
2018-06-12 19:55:56 +02:00
def get_annot_results(annot,name):
res=[]
annot=get_annotations()
for item in annot['rows']:
if 'selector' in item['target'][0]:
if len(item['target'][0]['selector'])>2:
if name in item['text'] or name in item['target'][0]['selector'][2]['exact']:
2018-06-13 10:20:53 +02:00
data={'text': item['text'],'extract':item['target'][0]['selector'][2]['exact'],'title':item['document']['title'], 'url':item['uri']}
2018-06-12 19:55:56 +02:00
res.append(data)
else:
if name in item['text'] or name in item['target'][0]['selector'][1]['exact']:
2018-06-13 10:20:53 +02:00
data={'text': item['text'],'extract':item['target'][0]['selector'][1]['exact'],'title':item['document']['title'], 'url':item['uri']}
2018-06-12 19:55:56 +02:00
res.append(data)
return res
2018-06-11 13:32:18 +02:00
2018-06-12 19:55:56 +02:00
def get_annot_book(annot,name):
2018-06-13 14:34:15 +02:00
res=[]
2018-06-13 16:49:14 +02:00
server = request.host
2018-06-12 19:55:56 +02:00
for item in annot['rows']:
if 'selector' in item['target'][0]:
if len(item['target'][0]['selector'])>2:
2018-06-13 14:34:15 +02:00
string=item['uri']
2018-06-13 16:49:14 +02:00
if name==string.replace('http://' + server+'/uploads/',''):
2018-06-13 10:20:53 +02:00
data={'text': item['text'],'extract':item['target'][0]['selector'][2]['exact'],'title':item['document']['title'], 'url':item['uri']}
2018-06-13 14:34:15 +02:00
res.append(data)
2018-06-12 19:55:56 +02:00
else:
2018-06-13 14:34:15 +02:00
string=item['uri']
2018-06-13 16:49:14 +02:00
if name==string.replace('http://' + server+'/uploads/',''):
2018-06-13 10:20:53 +02:00
data={'text': item['text'],'extract':item['target'][0]['selector'][1]['exact'],'title':item['document']['title'], 'url':item['uri']}
2018-06-13 14:34:15 +02:00
res.append(data)
return res
2018-06-11 13:32:18 +02:00
2018-06-06 18:54:07 +02:00