DAAP_interface/daapinterface.py

626 lines
20 KiB
Python

# encoding=utf8
# # # # # # # # # # # # # # # # # # # # # # # #
# REQUIREMENTS
# # # # # # # # # # # # # # # # # # # # # # # #
from flask import send_file, Flask, Response, url_for, render_template, Markup, jsonify, redirect, request, flash, session, make_response
import requests
from SPARQLWrapper import SPARQLWrapper, JSON
import json
# import pandas as pd
# ##### IMPORTS FOR TEST WIKIPAGE
from lxml import html
from bs4 import BeautifulSoup
import re
# # # # # # # # # # # # # # # # # # # # # # # #
# GETTING STARTED
# # # # # # # # # # # # # # # # # # # # # # # #
app = Flask(__name__, static_url_path='', static_folder="static", template_folder="templates")
app.jinja_env.add_extension('jinja2.ext.loopcontrols')
# # # # # # # # # # # # # # # # # # # # # # # #
# GETTING WIKIBASE DATA
# # # # # # # # # # # # # # # # # # # # # # # #
sparql = SPARQLWrapper("https://query.daap.bannerrepeater.org/proxy/wdqs/bigdata/namespace/wdq/sparql")
sparql2 = SPARQLWrapper("https://query.daap.bannerrepeater.org/proxy/wdqs/bigdata/namespace/wdq/sparql")
sparql3 = SPARQLWrapper("https://query.daap.bannerrepeater.org/proxy/wdqs/bigdata/namespace/wdq/sparql")
# # # # # # # # # # # # # # # # # # # # # # # #
# PAGES
# # # # # # # # # # # # # # # # # # # # # # # #
@app.route("/")
def home():
sparql.setQuery('''
SELECT ?work ?workLabel ?image ?date ?dateadded WHERE {
SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
?work wdt:P1 wd:Q1;
wdt:P87 ?dateadded.
?work p:P30 ?statement.
?statement ps:P30 ?image;
pq:P54 wd:Q90.
?work wdt:P13 ?date.
FILTER(?work != wd:Q57)
}
ORDER BY (?dateadded)
LIMIT 12
''')
sparql.setReturnFormat(JSON)
results = sparql.query().convert()
ImagesBanner = []
# print(results)
for publication in results["results"]["bindings"]:
publication_title = publication["workLabel"]["value"]
publication_uri = publication["work"]["value"]
#if key exists
if "date" in publication:
publication_date = publication["date"]["value"]
if "image" in publication:
publication_image = publication["image"]["value"]
ImagesBanner.append(publication_image)
ImagesBanner = ImagesBanner[-12:]
return render_template('home.html', results=results, ImagesBanner=ImagesBanner)
@app.route("/browsethearchive")
def browsethearchive():
sparql.setQuery('''
SELECT ?work ?workLabel ?image ?date WHERE {
SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
?work wdt:P1 wd:Q1.
OPTIONAL { ?work p:P30 ?statement.
?statement ps:P30 ?image;
pq:P54 wd:Q90.}
OPTIONAL { ?work wdt:P13 ?date. }
FILTER(?work != wd:Q57)
}
ORDER BY (?workLabel)
''')
sparql.setReturnFormat(JSON)
results = sparql.query().convert()
# print(results)
for publication in results["results"]["bindings"]:
publication_title = publication["workLabel"]["value"]
publication_uri = publication["work"]["value"]
#if key exists
if "date" in publication:
publication_date = publication["date"]["value"]
if "image" in publication:
publication_image = publication["image"]["value"]
return render_template('browsethearchive.html', results=results)
@app.route("/browsebycategory")
def browsebycategory():
return render_template('browsebycategory.html')
##########################
# CATEGORIES TO BE BROWSED
#########################
######################### ARTIST INDEX
@app.route("/artistsindex")
def artistsindex():
sparql.setQuery('''
SELECT ?creators ?creatorsLabel ?creatorsAltLabel ?creatorsDescription
WHERE {
{
SELECT ?creators (COUNT(DISTINCT ?a) AS ?count) WHERE {
?a ?prop ?creators .
?a wdt:P1 ?work .
BIND (wdt:P9 AS ?prop) .
BIND (wd:Q1 AS ?work) .
} GROUP BY ?creators
} .
SERVICE wikibase:label {
bd:serviceParam wikibase:language "en" .
}
FILTER (?creators !=wd:Q82)
}
ORDER BY DESC(?count) ?creatorsLabel
''')
sparql.setReturnFormat(JSON)
results = sparql.query().convert()
print(results)
return render_template('artistsindex.html', results=results)
######################### PUBLISHERS INDEX
@app.route("/publishersindex")
def publishersindex():
sparql.setQuery('''
SELECT ?publishers ?publishersLabel ?publishersAltLabel ?publishersDescription WHERE {
{
SELECT ?publishers (COUNT(DISTINCT ?a) AS ?count) WHERE {
?a ?prop ?publishers .
?a wdt:P1 ?work .
BIND (wdt:P10 AS ?prop) .
BIND (wd:Q1 AS ?work) .
} GROUP BY ?publishers
} .
SERVICE wikibase:label {
bd:serviceParam wikibase:language "en" .
}
FILTER(?publishers != wd:Q83)
FILTER(?publishers != wd:Q71)
}
ORDER BY DESC(?count) ?publishersLabel
''')
sparql.setReturnFormat(JSON)
results = sparql.query().convert()
return render_template('publishersindex.html', results=results)
######################### SELF PUBLISHED INDEX
@app.route("/selfpublishedindex")
def selfpublishedindex():
sparql.setQuery('''
SELECT ?work ?workLabel ?image ?date
WHERE {
SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
?work wdt:P1 wd:Q1;
wdt:P10 wd:Q71.
OPTIONAL {?work p:P30 ?statement.
?statement ps:P30 ?image;
pq:P54 wd:Q90.}
OPTIONAL { ?work wdt:P13 ?date. }
FILTER(?work != wd:Q57)
}
''')
sparql.setReturnFormat(JSON)
results = sparql.query().convert()
return render_template('selfpublishedindex.html', results=results)
######################### ZINES INDEX
@app.route("/zinesindex")
def zinesindex():
sparql.setQuery('''
SELECT ?work ?workLabel ?image ?date
WHERE {
SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
?work wdt:P1 wd:Q1;
wdt:P10 wd:Q71.
OPTIONAL {?work p:P30 ?statement.
?statement ps:P30 ?image;
pq:P54 wd:Q90.}
OPTIONAL { ?work wdt:P13 ?date. }
FILTER(?work != wd:Q57)
}
''')
sparql.setReturnFormat(JSON)
results = sparql.query().convert()
print(results)
return render_template('zinesindex.html', results=results)
##########################
# DETAILED INDIVIDUAL PAGES
#########################
######################### ARTWORK
@app.route("/artwork", methods=['GET'])
def artwork():
artwork_id = request.args.get('id')
# Artwork Intro / Top of the page
sparql.setQuery('''
SELECT ?work ?workLabel ?workDescription ?itemtypeLabel
{
VALUES ?work {wd:'''+artwork_id+'''}
?work wdt:P1 ?itemtype.
SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
}
''')
sparql.setReturnFormat(JSON)
artworkintro = sparql.query().convert()
# print(artworkintro)
# Image(s)
sparql.setQuery('''
SELECT ?image ?depictsLabel ?licenseLabel
WHERE
{
VALUES ?work {wd:'''+artwork_id+'''}
?work wdt:P30 ?image.
OPTIONAL { ?work p:P30 ?statement2.
?statement2 ps:P30 ?image;
pq:P54 ?depicts;
pq:P56 ?license.}
SERVICE wikibase:label { bd:serviceParam wikibase:language "en".}
}
''')
sparql.setReturnFormat(JSON)
artworkimages = sparql.query().convert()
# print(artworkimages)
######### Right top
# contributors
sparql.setQuery('''
SELECT DISTINCT ?creators ?creatorsLabel (group_concat(?creatorRolesLabel; separator="; ") as ?role)
WHERE
{
VALUES ?work {wd:'''+artwork_id+'''}
?work wdt:P9 ?creators.
OPTIONAL { ?work p:P9 ?statement1.
?statement1 ps:P9 ?creators;
pq:P49 ?creatorRoles. }
SERVICE wikibase:label { bd:serviceParam wikibase:language "en".
?creators rdfs:label ?creatorsLabel.
?creatorRoles rdfs:label ?creatorRolesLabel. }
}
GROUP BY ?creators ?creatorsLabel
ORDER BY ?creatorsLabel
''')
sparql.setReturnFormat(JSON)
artworkcontributors = sparql.query().convert()
# print(artworkcontributors)
# date
sparql.setQuery('''
SELECT ?date ?sourceLabel
WHERE
{
VALUES ?work {wd:'''+artwork_id+'''}
?work wdt:P13 ?date.
OPTIONAL { ?work p:P13 ?statement1.
?statement1 ps:P13 ?date;
pq:P50 ?source. }
SERVICE wikibase:label { bd:serviceParam wikibase:language "en".}
}
''')
sparql.setReturnFormat(JSON)
artworkdate = sparql.query().convert()
# print(artworkdate)
# publishers
sparql.setQuery('''
SELECT DISTINCT ?publishers ?publishersLabel (group_concat(?publishersRolesLabel; separator="; ") as ?role)
WHERE
{
VALUES ?work {wd:'''+artwork_id+'''}
?work wdt:P10 ?publishers.
OPTIONAL { ?work p:P10 ?statement1.
?statement1 ps:P10 ?publishers;
pq:P49 ?creatorRoles. }
SERVICE wikibase:label { bd:serviceParam wikibase:language "en".
?publishers rdfs:label ?publishersLabel.
?publishersRoles rdfs:label ?publishersRolesLabel. }
}
GROUP BY ?publishers ?publishersLabel
ORDER BY ?publishersLabel
''')
sparql.setReturnFormat(JSON)
artworkpublisher = sparql.query().convert()
print(artworkpublisher)
#####right middle
# description + id to be changed
sparql.setQuery('''
SELECT ?accessURLdescriptionPage ?authordescriptionPage ?authordescriptionPageLabel ?datedescriptionPage ?sourcedescriptionPage ?sourcedescriptionPageLabel
WHERE
{
VALUES ?work {wd:'''+artwork_id+'''}
?work wdt:P65 ?descriptionPage.
OPTIONAL { ?descriptionPage wdt:P4 ?accessURLdescriptionPage. }
OPTIONAL { ?descriptionPage wdt:P9 ?authordescriptionPage. }
OPTIONAL { ?descriptionPage wdt:P13 ?datedescriptionPage. }
OPTIONAL { ?descriptionPage wdt:P50 ?sourcedescriptionPage. }
SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
}
''')
sparql.setReturnFormat(JSON)
artworkdescriptiondata = sparql.query().convert()
# print("hello")
# print(artworkdescriptiondata)
# print("bye")
dictionary = artworkdescriptiondata
# print(type(dictionary))
# get the description text or say there isn't any
# artworkdescriptiontext = None
# for x in dictionary['results']['bindings']:
# if "accessURLdescriptionPage" in x:
# print("url for description present")
# # get the description content from wiki
# artworkdescriptioncontenturl = "https://daap.bannerrepeater.org/w/index.php?title=Description:"+artwork_id+"&action=render"
# # Make a GET request to fetch the raw HTML content
# html_content = requests.get(artworkdescriptioncontenturl).text
# # Parse the html content
# soup = BeautifulSoup(html_content, "lxml")
# # print(soup.prettify()) # print the parsed data of html
# # text=soup.find("div" , {"class" : "mw-parser-output"})
# text=soup.find_all("p")
# artworkdescriptiontext=Markup(text)
# else:
# print("url for description absent")
# text="<p>Information not available</p>"
# artworkdescriptiontext=Markup(text)
artworkdescriptioncontenturl = "https://daap.bannerrepeater.org/w/index.php?title=Description:Q427&action=render"
# Make a GET request to fetch the raw HTML content
html_content = requests.get(artworkdescriptioncontenturl).text
# Parse the html content
soup = BeautifulSoup(html_content, "lxml")
# print(soup.prettify()) # print the parsed data of html
text=soup.find("div" , {"class" : "mw-parser-output"})
# text=soup.find_all("p")
artworkdescriptiontext=Markup(text)
print(artworkdescriptiontext)
############ right bottom LATER
# exhibitions + id to be changed
sparql.setQuery('''
SELECT ?accessURLexhibitionHisPage ?authorexhibitionHisPageLabel ?dateexhibitionHisPage ?sourceexhibitionHisPage
WHERE
{
VALUES ?work {wd:Q57}
?work wdt:P66 ?exhibitionHisPage.
OPTIONAL { ?exhibitionHisPage wdt:P4 ?accessURLexhibitionHisPage. }
OPTIONAL { ?exhibitionHisPage wdt:P9 ?authorexhibitionHisPage. }
OPTIONAL { ?exhibitionHisPage wdt:P13 ?dateexhibitionHisPage. }
OPTIONAL { ?exhibitionHisPage wdt:P50 ?sourceexhibitionHisPage. }
SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
}
''')
sparql.setReturnFormat(JSON)
artworkexhibitiondata = sparql.query().convert()
# description content from wiki
artworkexhibitioncontenturl = "https://daap.bannerrepeater.org/w/index.php?title=History:"+artwork_id+"&action=render"
# Make a GET request to fetch the raw HTML content
html_content = requests.get(artworkexhibitioncontenturl).text
# Parse the html content
soup = BeautifulSoup(html_content, "lxml")
# print(soup.prettify()) # print the parsed data of html
text=soup.find("div" , {"class" : "mw-parser-output"})
# text=soup.find_all("p")
artworkexhibitiontext=Markup(text)
# print(artworkexhibitiontext)
############## bottom
# copies in collection
sparql.setQuery('''
SELECT ?copiesCollections ?copiesCollectionsLabel ?collection ?collectionLabel ?image
WHERE
{
VALUES ?work {wd:'''+artwork_id+'''}
?work wdt:P43 ?copiesCollections.
?copiesCollections wdt:P47 ?collection.
OPTIONAL { ?collection wdt:P30 ?image. }
SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
} ''')
sparql.setReturnFormat(JSON)
copiesincollection = sparql.query().convert()
# print(copiesincollection)
# related works
sparql.setQuery('''
SELECT ?relatedWorks ?relatedWorksLabel ?image ?daterelatedWorks
WHERE {
{
SELECT ?relatedWorks ?relatedWorksLabel (SAMPLE(?daterelatedWorks) AS ?daterelatedWorks)
WHERE
{ VALUES ?work {wd:'''+artwork_id+'''}
?work wdt:P44 ?relatedWorks.
OPTIONAL {?relatedWorks wdt:P13 ?daterelatedWorks.}
SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
}
GROUP BY ?relatedWorks ?relatedWorksLabel
ORDER BY ?relatedWorksLabel
}
OPTIONAL {?relatedWorks wdt:P30 ?image.}
} ''')
sparql.setReturnFormat(JSON)
relatedworks = sparql.query().convert()
# print(relatedworks)
return render_template('artwork.html', artwork_id=artwork_id, artworkintro=artworkintro, artworkimages=artworkimages, artworkcontributors=artworkcontributors, artworkdate=artworkdate, artworkpublisher=artworkpublisher, artworkdescriptiondata=artworkdescriptiondata, artworkdescriptiontext=artworkdescriptiontext, copiesincollection=copiesincollection, relatedworks=relatedworks)
######################### PERSON - FOR NOW THIS IS A MESS
@app.route("/person", methods=['GET'])
def person():
person_id = request.args.get('id')
sparql.setQuery('''
SELECT ?item ?itemLabel ?itemDescription ?propLabel ?b ?bLabel
WHERE
{
VALUES ?item {wd:'''+person_id+'''}
?item ?a ?b.
SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
?prop wikibase:directClaim ?a .
}
''')
sparql.setReturnFormat(JSON)
person_details = sparql.query().convert()
print(person_details)
sparql2.setQuery('''
SELECT ?work ?workLabel ?image ?date
WHERE {
SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
?work wdt:P1 wd:Q1.
?work wdt:P9 wd:'''+person_id+'''.
OPTIONAL { ?work wdt:P30 ?image. }
OPTIONAL { ?work wdt:P13 ?date. }
FILTER(?work != wd:Q57)
}
ORDER BY (?workLabel)
''')
sparql2.setReturnFormat(JSON)
person_creatorof = sparql2.query().convert()
sparql3.setQuery('''
SELECT ?work ?workLabel ?image ?date
WHERE {
SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
?work wdt:P1 wd:Q1.
?work wdt:P10 wd:'''+person_id+'''.
OPTIONAL { ?work wdt:P30 ?image. }
OPTIONAL { ?work wdt:P13 ?date. }
FILTER(?work != wd:Q57)
}
ORDER BY (?workLabel)
''')
sparql3.setReturnFormat(JSON)
person_publisherof = sparql2.query().convert()
person_url = ""
person_name = "the name"
person_description = "short bio"
return render_template("person.html", person_id=person_id, person_creatorof=person_creatorof, person_publisherof=person_publisherof, person_details=person_details)
######################### ORGANISATION
@app.route("/organisation", methods=['GET'])
def organisation():
org_id = request.args.get('id')
return render_template("organisation.html")
#########################
# PAGES FROM WIKI
#########################
######################### SEARCH TOOLS
@app.route("/searchtools")
def searchtools():
return render_template('searchtools.html')
######################### ABOUT
@app.route("/about")
def about():
url="https://daap.bannerrepeater.org/w/index.php?title=About&action=render"
# Make a GET request to fetch the raw HTML content
html_content = requests.get(url).text
# Parse the html content
soup = BeautifulSoup(html_content, "lxml")
# print(soup.prettify()) # print the parsed data of html
text=soup.find("html")
# adapt the path to img with regex
# replaceString = "wiki/Special:Redirect/file/"
# cleanSoup = BeautifulSoup(str(text).replace("wiki/File:", replaceString))
# for a in soup.find_all('a', href=True):
# if a.text:
# print(a['href'])
# replace src from img zith href from a href and remove the a
text=Markup(text)
return render_template('about.html', text=text)
######################### TUTORIAL
@app.route("/tutorials")
def tutorials():
url="https://daap.bannerrepeater.org/w/index.php?title=Tutorials&action=render"
# Make a GET request to fetch the raw HTML content
html_content = requests.get(url).text
# Parse the html content
soup = BeautifulSoup(html_content, "lxml")
# print(soup.prettify()) # print the parsed data of html
text=soup.find("html")
text=Markup(text)
return render_template('tutorials.html')
######################### UPLOAD
@app.route("/upload")
def upload():
url="https://daap.bannerrepeater.org/w/index.php?title=Upload&action=render"
# Make a GET request to fetch the raw HTML content
html_content = requests.get(url).text
# Parse the html content
soup = BeautifulSoup(html_content, "lxml")
# print(soup.prettify()) # print the parsed data of html
text=soup.find("html")
text=Markup(text)
return render_template('upload.html', text=text)
# #################### CASE STUDY
@app.route("/casestudy")
def casestudy():
url="https://daap.bannerrepeater.org/w/index.php?title=Carolee_Schneemann_case_study&action=render"
# Make a GET request to fetch the raw HTML content
html_content = requests.get(url).text
# Parse the html content
soup = BeautifulSoup(html_content, "lxml")
# print(soup.prettify()) # print the parsed data of html
text=soup.find("html")
text=Markup(text)
return render_template('casestudy.html', text=text)
# #################### CODE OF CONDUCT
@app.route("/codeofconduct")
def codeofconduct():
url="https://daap.bannerrepeater.org/w/index.php?title=Code_of_Conduct&action=render"
# Make a GET request to fetch the raw HTML content
html_content = requests.get(url).text
# Parse the html content
soup = BeautifulSoup(html_content, "lxml")
# print(soup.prettify()) # print the parsed data of html
text=soup.find("html")
text=Markup(text)
return render_template('codeofconduct.html', text=text)
######################### LOGIN
#Goes to wikibase page
# ###################
# TEST
response = requests.get(
'https://daap.bannerrepeater.org/w/api.php',
params={
'action': 'parse',
'page': 'Test',
'format': 'json',
}).json()
raw_html = response['parse']['text']['*']
document = html.document_fromstring(raw_html)
first_p = document.xpath('//p')[0]
intro_text = first_p.text_content()
# print(intro_text)
# ALL NAME
# https://daap.bannerrepeater.org/w/api.php?action=query&meta=siteinfo&siprop=namespaces|namespacealiases
# replace or insert tags
# https://stackoverflow.com/questions/2073541/search-and-replace-in-html-with-beautifulsoup