# encoding=utf8
# # # # # # # # # # # # # # # # # # # # # # # #
# REQUIREMENTS
# # # # # # # # # # # # # # # # # # # # # # # #
from flask import send_file , Flask , Response , url_for , render_template , Markup , jsonify , redirect , request , flash , session , make_response
import requests
from SPARQLWrapper import SPARQLWrapper , JSON
import json
# import pandas as pd
# ##### IMPORTS FOR TEST WIKIPAGE
from lxml import html
from bs4 import BeautifulSoup
import re
# # # # # # # # # # # # # # # # # # # # # # # #
# GETTING STARTED
# # # # # # # # # # # # # # # # # # # # # # # #
app = Flask ( __name__ , static_url_path = ' ' , static_folder = " static " , template_folder = " templates " )
app . jinja_env . add_extension ( ' jinja2.ext.loopcontrols ' )
# # # # # # # # # # # # # # # # # # # # # # # #
# GETTING WIKIBASE DATA
# # # # # # # # # # # # # # # # # # # # # # # #
sparql = SPARQLWrapper ( " https://query.daap.bannerrepeater.org/proxy/wdqs/bigdata/namespace/wdq/sparql " )
sparql2 = SPARQLWrapper ( " https://query.daap.bannerrepeater.org/proxy/wdqs/bigdata/namespace/wdq/sparql " )
sparql3 = SPARQLWrapper ( " https://query.daap.bannerrepeater.org/proxy/wdqs/bigdata/namespace/wdq/sparql " )
# # # # # # # # # # # # # # # # # # # # # # # #
# PAGES
# # # # # # # # # # # # # # # # # # # # # # # #
@app . route ( " / " )
def home ( ) :
sparql . setQuery ( '''
SELECT ? work ? workLabel ? image ? date WHERE {
SERVICE wikibase : label { bd : serviceParam wikibase : language " [AUTO_LANGUAGE],en " . }
? work wdt : P1 wd : Q1 .
OPTIONAL { ? work p : P30 ? statement .
? statement ps : P30 ? image ;
pq : P54 wd : Q90 . }
OPTIONAL { ? work wdt : P13 ? date . }
FILTER ( ? work != wd : Q57 )
}
ORDER BY ( ? workLabel )
''' )
sparql . setReturnFormat ( JSON )
results = sparql . query ( ) . convert ( )
ImagesBanner = [ ]
# print(results)
for publication in results [ " results " ] [ " bindings " ] :
publication_title = publication [ " workLabel " ] [ " value " ]
publication_uri = publication [ " work " ] [ " value " ]
#if key exists
if " date " in publication :
publication_date = publication [ " date " ] [ " value " ]
if " image " in publication :
publication_image = publication [ " image " ] [ " value " ]
ImagesBanner . append ( publication_image )
ImagesBanner = ImagesBanner [ - 12 : ]
return render_template ( ' home.html ' , results = results , ImagesBanner = ImagesBanner )
@app . route ( " /browsethearchive " )
def browsethearchive ( ) :
sparql . setQuery ( '''
SELECT ? work ? workLabel ? image ? date WHERE {
SERVICE wikibase : label { bd : serviceParam wikibase : language " [AUTO_LANGUAGE],en " . }
? work wdt : P1 wd : Q1 .
OPTIONAL { ? work p : P30 ? statement .
? statement ps : P30 ? image ;
pq : P54 wd : Q90 . }
OPTIONAL { ? work wdt : P13 ? date . }
FILTER ( ? work != wd : Q57 )
}
ORDER BY ( ? workLabel )
''' )
sparql . setReturnFormat ( JSON )
results = sparql . query ( ) . convert ( )
# print(results)
for publication in results [ " results " ] [ " bindings " ] :
publication_title = publication [ " workLabel " ] [ " value " ]
publication_uri = publication [ " work " ] [ " value " ]
#if key exists
if " date " in publication :
publication_date = publication [ " date " ] [ " value " ]
if " image " in publication :
publication_image = publication [ " image " ] [ " value " ]
return render_template ( ' browsethearchive.html ' , results = results )
@app . route ( " /browsebycategory " )
def browsebycategory ( ) :
return render_template ( ' browsebycategory.html ' )
##########################
# CATEGORIES TO BE BROWSED
#########################
######################### ARTIST INDEX
@app . route ( " /artistsindex " )
def artistsindex ( ) :
sparql . setQuery ( '''
SELECT ? creators ? creatorsLabel ? creatorsAltLabel ? creatorsDescription
WHERE {
{
SELECT ? creators ( COUNT ( DISTINCT ? a ) AS ? count ) WHERE {
? a ? prop ? creators .
? a wdt : P1 ? work .
BIND ( wdt : P9 AS ? prop ) .
BIND ( wd : Q1 AS ? work ) .
} GROUP BY ? creators
} .
SERVICE wikibase : label {
bd : serviceParam wikibase : language " en " .
}
FILTER ( ? creators != wd : Q82 )
}
ORDER BY DESC ( ? count ) ? creatorsLabel
''' )
sparql . setReturnFormat ( JSON )
results = sparql . query ( ) . convert ( )
print ( results )
return render_template ( ' artistsindex.html ' , results = results )
######################### PUBLISHERS INDEX
@app . route ( " /publishersindex " )
def publishersindex ( ) :
sparql . setQuery ( '''
SELECT ? publishers ? publishersLabel ? publishersAltLabel ? publishersDescription WHERE {
{
SELECT ? publishers ( COUNT ( DISTINCT ? a ) AS ? count ) WHERE {
? a ? prop ? publishers .
? a wdt : P1 ? work .
BIND ( wdt : P10 AS ? prop ) .
BIND ( wd : Q1 AS ? work ) .
} GROUP BY ? publishers
} .
SERVICE wikibase : label {
bd : serviceParam wikibase : language " en " .
}
FILTER ( ? publishers != wd : Q83 )
FILTER ( ? publishers != wd : Q71 )
}
ORDER BY DESC ( ? count ) ? publishersLabel
''' )
sparql . setReturnFormat ( JSON )
results = sparql . query ( ) . convert ( )
return render_template ( ' publishersindex.html ' , results = results )
######################### SELF PUBLISHED INDEX
@app . route ( " /selfpublishedindex " )
def selfpublishedindex ( ) :
sparql . setQuery ( '''
SELECT ? work ? workLabel ? workAltLabel ? workDescription
WHERE {
SERVICE wikibase : label { bd : serviceParam wikibase : language " [AUTO_LANGUAGE],en " . }
? work wdt : P1 wd : Q1 ;
wdt : P10 wd : Q71 .
FILTER ( ? work != wd : Q57 )
}
''' )
sparql . setReturnFormat ( JSON )
results = sparql . query ( ) . convert ( )
return render_template ( ' selfpublishedindex.html ' , results = results )
######################### ZINES INDEX
@app . route ( " /zinesindex " )
def zinesindex ( ) :
sparql . setQuery ( '''
SELECT ? work ? workLabel ? workAltLabel ? workDescription
WHERE {
SERVICE wikibase : label { bd : serviceParam wikibase : language " [AUTO_LANGUAGE],en " . }
? work wdt : P1 wd : Q1 ;
wdt : P16 wd : Q152 .
FILTER ( ? work != wd : Q57 )
}
''' )
sparql . setReturnFormat ( JSON )
results = sparql . query ( ) . convert ( )
return render_template ( ' zinesindex.html ' , results = results )
##########################
# DETAILED INDIVIDUAL PAGES
#########################
######################### ARTWORK
@app . route ( " /artwork " , methods = [ ' GET ' ] )
def artwork ( ) :
artwork_id = request . args . get ( ' id ' )
# Artwork Intro / Top of the page
sparql . setQuery ( '''
SELECT ? work ? workLabel ? workDescription ? itemtypeLabel
{
VALUES ? work { wd : ''' +artwork_id+ ''' }
? work wdt : P1 ? itemtype .
SERVICE wikibase : label { bd : serviceParam wikibase : language " en " . }
}
''' )
sparql . setReturnFormat ( JSON )
artworkintro = sparql . query ( ) . convert ( )
# print(artworkintro)
# Image(s)
sparql . setQuery ( '''
SELECT ? image ? depictsLabel ? licenseLabel
WHERE
{
VALUES ? work { wd : ''' +artwork_id+ ''' }
? work wdt : P30 ? image .
OPTIONAL { ? work p : P30 ? statement2 .
? statement2 ps : P30 ? image ;
pq : P54 ? depicts ;
pq : P56 ? license . }
SERVICE wikibase : label { bd : serviceParam wikibase : language " en " . }
}
''' )
sparql . setReturnFormat ( JSON )
artworkimages = sparql . query ( ) . convert ( )
# print(artworkimages)
######### Right top
# contributors
sparql . setQuery ( '''
SELECT DISTINCT ? creators ? creatorsLabel ( group_concat ( ? creatorRolesLabel ; separator = " ; " ) as ? role )
WHERE
{
VALUES ? work { wd : ''' +artwork_id+ ''' }
? work wdt : P9 ? creators .
OPTIONAL { ? work p : P9 ? statement1 .
? statement1 ps : P9 ? creators ;
pq : P49 ? creatorRoles . }
SERVICE wikibase : label { bd : serviceParam wikibase : language " en " .
? creators rdfs : label ? creatorsLabel .
? creatorRoles rdfs : label ? creatorRolesLabel . }
}
GROUP BY ? creators ? creatorsLabel
ORDER BY ? creatorsLabel
''' )
sparql . setReturnFormat ( JSON )
artworkcontributors = sparql . query ( ) . convert ( )
# print(artworkcontributors)
# date
sparql . setQuery ( '''
SELECT ? date ? sourceLabel
WHERE
{
VALUES ? work { wd : ''' +artwork_id+ ''' }
? work wdt : P13 ? date .
OPTIONAL { ? work p : P13 ? statement1 .
? statement1 ps : P13 ? date ;
pq : P50 ? source . }
SERVICE wikibase : label { bd : serviceParam wikibase : language " en " . }
}
''' )
sparql . setReturnFormat ( JSON )
artworkdate = sparql . query ( ) . convert ( )
# print(artworkdate)
# publishers
sparql . setQuery ( '''
SELECT DISTINCT ? publishers ? publishersLabel ( group_concat ( ? publishersRolesLabel ; separator = " ; " ) as ? role )
WHERE
{
VALUES ? work { wd : ''' +artwork_id+ ''' }
? work wdt : P10 ? publishers .
OPTIONAL { ? work p : P10 ? statement1 .
? statement1 ps : P10 ? publishers ;
pq : P49 ? creatorRoles . }
SERVICE wikibase : label { bd : serviceParam wikibase : language " en " .
? publishers rdfs : label ? publishersLabel .
? publishersRoles rdfs : label ? publishersRolesLabel . }
}
GROUP BY ? publishers ? publishersLabel
ORDER BY ? publishersLabel
''' )
sparql . setReturnFormat ( JSON )
artworkpublisher = sparql . query ( ) . convert ( )
print ( artworkpublisher )
#####right middle
# description + id to be changed
sparql . setQuery ( '''
SELECT ? accessURLdescriptionPage ? authordescriptionPage ? authordescriptionPageLabel ? datedescriptionPage ? sourcedescriptionPage ? sourcedescriptionPageLabel
WHERE
{
VALUES ? work { wd : ''' +artwork_id+ ''' }
? work wdt : P65 ? descriptionPage .
OPTIONAL { ? descriptionPage wdt : P4 ? accessURLdescriptionPage . }
OPTIONAL { ? descriptionPage wdt : P9 ? authordescriptionPage . }
OPTIONAL { ? descriptionPage wdt : P13 ? datedescriptionPage . }
OPTIONAL { ? descriptionPage wdt : P50 ? sourcedescriptionPage . }
SERVICE wikibase : label { bd : serviceParam wikibase : language " en " . }
}
''' )
sparql . setReturnFormat ( JSON )
artworkdescriptiondata = sparql . query ( ) . convert ( )
# print("hello")
# print(artworkdescriptiondata)
# print("bye")
dictionary = artworkdescriptiondata
# print(type(dictionary))
# get the description text or say there isn't any
# artworkdescriptiontext = None
# for x in dictionary['results']['bindings']:
# if "accessURLdescriptionPage" in x:
# print("url for description present")
# # get the description content from wiki
# artworkdescriptioncontenturl = "https://daap.bannerrepeater.org/w/index.php?title=Description:"+artwork_id+"&action=render"
# # Make a GET request to fetch the raw HTML content
# html_content = requests.get(artworkdescriptioncontenturl).text
# # Parse the html content
# soup = BeautifulSoup(html_content, "lxml")
# # print(soup.prettify()) # print the parsed data of html
# # text=soup.find("div" , {"class" : "mw-parser-output"})
# text=soup.find_all("p")
# artworkdescriptiontext=Markup(text)
# else:
# print("url for description absent")
# text="<p>Information not available</p>"
# artworkdescriptiontext=Markup(text)
artworkdescriptioncontenturl = " https://daap.bannerrepeater.org/w/index.php?title=Description:Q427&action=render "
# Make a GET request to fetch the raw HTML content
html_content = requests . get ( artworkdescriptioncontenturl ) . text
# Parse the html content
soup = BeautifulSoup ( html_content , " lxml " )
# print(soup.prettify()) # print the parsed data of html
# text=soup.find("div" , {"class" : "mw-parser-output"})
text = soup . find_all ( " p " )
artworkdescriptiontext = Markup ( text )
print ( artworkdescriptiontext )
############ right bottom LATER
# exhibitions + id to be changed
sparql . setQuery ( '''
SELECT ? accessURLexhibitionHisPage ? authorexhibitionHisPageLabel ? dateexhibitionHisPage ? sourceexhibitionHisPage
WHERE
{
VALUES ? work { wd : Q57 }
? work wdt : P66 ? exhibitionHisPage .
OPTIONAL { ? exhibitionHisPage wdt : P4 ? accessURLexhibitionHisPage . }
OPTIONAL { ? exhibitionHisPage wdt : P9 ? authorexhibitionHisPage . }
OPTIONAL { ? exhibitionHisPage wdt : P13 ? dateexhibitionHisPage . }
OPTIONAL { ? exhibitionHisPage wdt : P50 ? sourceexhibitionHisPage . }
SERVICE wikibase : label { bd : serviceParam wikibase : language " en " . }
}
''' )
sparql . setReturnFormat ( JSON )
artworkexhibitiondata = sparql . query ( ) . convert ( )
# description content from wiki
artworkexhibitioncontenturl = " https://daap.bannerrepeater.org/w/index.php?title=History: " + artwork_id + " &action=render "
# Make a GET request to fetch the raw HTML content
html_content = requests . get ( artworkexhibitioncontenturl ) . text
# Parse the html content
soup = BeautifulSoup ( html_content , " lxml " )
# print(soup.prettify()) # print the parsed data of html
# text=soup.find("div" , {"class" : "mw-parser-output"})
text = soup . find_all ( " p " )
artworkexhibitiontext = Markup ( text )
# print(artworkexhibitiontext)
############## bottom
# copies in collection
sparql . setQuery ( '''
SELECT ? copiesCollections ? copiesCollectionsLabel ? collection ? collectionLabel ? image
WHERE
{
VALUES ? work { wd : ''' +artwork_id+ ''' }
? work wdt : P43 ? copiesCollections .
? copiesCollections wdt : P47 ? collection .
OPTIONAL { ? collection wdt : P30 ? image . }
SERVICE wikibase : label { bd : serviceParam wikibase : language " en " . }
} ''' )
sparql . setReturnFormat ( JSON )
copiesincollection = sparql . query ( ) . convert ( )
# print(copiesincollection)
# related works
sparql . setQuery ( '''
SELECT ? relatedWorks ? relatedWorksLabel ? image ? daterelatedWorks
WHERE {
{
SELECT ? relatedWorks ? relatedWorksLabel ( SAMPLE ( ? daterelatedWorks ) AS ? daterelatedWorks )
WHERE
{ VALUES ? work { wd : ''' +artwork_id+ ''' }
? work wdt : P44 ? relatedWorks .
OPTIONAL { ? relatedWorks wdt : P13 ? daterelatedWorks . }
SERVICE wikibase : label { bd : serviceParam wikibase : language " en " . }
}
GROUP BY ? relatedWorks ? relatedWorksLabel
ORDER BY ? relatedWorksLabel
}
OPTIONAL { ? relatedWorks wdt : P30 ? image . }
} ''' )
sparql . setReturnFormat ( JSON )
relatedworks = sparql . query ( ) . convert ( )
# print(relatedworks)
return render_template ( ' artwork.html ' , artwork_id = artwork_id , artworkintro = artworkintro , artworkimages = artworkimages , artworkcontributors = artworkcontributors , artworkdate = artworkdate , artworkpublisher = artworkpublisher , artworkdescriptiondata = artworkdescriptiondata , artworkdescriptiontext = artworkdescriptiontext , copiesincollection = copiesincollection , relatedworks = relatedworks )
######################### PERSON - FOR NOW THIS IS A MESS
@app . route ( " /person " , methods = [ ' GET ' ] )
def person ( ) :
person_id = request . args . get ( ' id ' )
sparql . setQuery ( '''
SELECT ? item ? itemLabel ? itemDescription ? propLabel ? b ? bLabel
WHERE
{
VALUES ? item { wd : ''' +person_id+ ''' }
? item ? a ? b .
SERVICE wikibase : label { bd : serviceParam wikibase : language " en " . }
? prop wikibase : directClaim ? a .
}
''' )
sparql . setReturnFormat ( JSON )
person_details = sparql . query ( ) . convert ( )
print ( person_details )
sparql2 . setQuery ( '''
SELECT ? work ? workLabel ? image ? date
WHERE {
SERVICE wikibase : label { bd : serviceParam wikibase : language " [AUTO_LANGUAGE],en " . }
? work wdt : P1 wd : Q1 .
? work wdt : P9 wd : ''' +person_id+ ''' .
OPTIONAL { ? work wdt : P30 ? image . }
OPTIONAL { ? work wdt : P13 ? date . }
FILTER ( ? work != wd : Q57 )
}
ORDER BY ( ? workLabel )
''' )
sparql2 . setReturnFormat ( JSON )
person_creatorof = sparql2 . query ( ) . convert ( )
sparql3 . setQuery ( '''
SELECT ? work ? workLabel ? image ? date
WHERE {
SERVICE wikibase : label { bd : serviceParam wikibase : language " [AUTO_LANGUAGE],en " . }
? work wdt : P1 wd : Q1 .
? work wdt : P10 wd : ''' +person_id+ ''' .
OPTIONAL { ? work wdt : P30 ? image . }
OPTIONAL { ? work wdt : P13 ? date . }
FILTER ( ? work != wd : Q57 )
}
ORDER BY ( ? workLabel )
''' )
sparql3 . setReturnFormat ( JSON )
person_publisherof = sparql2 . query ( ) . convert ( )
person_url = " "
person_name = " the name "
person_description = " short bio "
return render_template ( " person.html " , person_id = person_id , person_creatorof = person_creatorof , person_publisherof = person_publisherof , person_details = person_details )
######################### ORGANISATION
@app . route ( " /organisation " , methods = [ ' GET ' ] )
def organisation ( ) :
org_id = request . args . get ( ' id ' )
return render_template ( " organisation.html " )
#########################
# PAGES FROM WIKI
#########################
######################### SEARCH TOOLS
@app . route ( " /searchtools " )
def searchtools ( ) :
return render_template ( ' searchtools.html ' )
######################### ABOUT
@app . route ( " /about " )
def about ( ) :
url = " https://daap.bannerrepeater.org/w/index.php?title=About&action=render "
# Make a GET request to fetch the raw HTML content
html_content = requests . get ( url ) . text
# Parse the html content
soup = BeautifulSoup ( html_content , " lxml " )
# print(soup.prettify()) # print the parsed data of html
text = soup . find ( " html " )
# adapt the path to img with regex
# replaceString = "wiki/Special:Redirect/file/"
# cleanSoup = BeautifulSoup(str(text).replace("wiki/File:", replaceString))
# for a in soup.find_all('a', href=True):
# if a.text:
# print(a['href'])
# replace src from img zith href from a href and remove the a
text = Markup ( text )
return render_template ( ' about.html ' , text = text )
######################### TUTORIAL
@app . route ( " /tutorials " )
def tutorials ( ) :
url = " https://daap.bannerrepeater.org/w/index.php?title=Tutorials&action=render "
# Make a GET request to fetch the raw HTML content
html_content = requests . get ( url ) . text
# Parse the html content
soup = BeautifulSoup ( html_content , " lxml " )
# print(soup.prettify()) # print the parsed data of html
text = soup . find ( " html " )
text = Markup ( text )
return render_template ( ' tutorials.html ' )
######################### UPLOAD
@app . route ( " /upload " )
def upload ( ) :
url = " https://daap.bannerrepeater.org/w/index.php?title=Upload&action=render "
# Make a GET request to fetch the raw HTML content
html_content = requests . get ( url ) . text
# Parse the html content
soup = BeautifulSoup ( html_content , " lxml " )
# print(soup.prettify()) # print the parsed data of html
text = soup . find ( " html " )
text = Markup ( text )
return render_template ( ' upload.html ' , text = text )
# #################### CASE STUDY
@app . route ( " /casestudy " )
def casestudy ( ) :
url = " https://daap.bannerrepeater.org/w/index.php?title=Carolee_Schneemann_case_study&action=render "
# Make a GET request to fetch the raw HTML content
html_content = requests . get ( url ) . text
# Parse the html content
soup = BeautifulSoup ( html_content , " lxml " )
# print(soup.prettify()) # print the parsed data of html
text = soup . find ( " html " )
text = Markup ( text )
return render_template ( ' casestudy.html ' , text = text )
# #################### CODE OF CONDUCT
@app . route ( " /codeofconduct " )
def codeofconduct ( ) :
url = " https://daap.bannerrepeater.org/w/index.php?title=Code_of_Conduct&action=render "
# Make a GET request to fetch the raw HTML content
html_content = requests . get ( url ) . text
# Parse the html content
soup = BeautifulSoup ( html_content , " lxml " )
# print(soup.prettify()) # print the parsed data of html
text = soup . find ( " html " )
text = Markup ( text )
return render_template ( ' codeofconduct.html ' , text = text )
######################### LOGIN
#Goes to wikibase page
# ###################
# TEST
response = requests . get (
' https://daap.bannerrepeater.org/w/api.php ' ,
params = {
' action ' : ' parse ' ,
' page ' : ' Test ' ,
' format ' : ' json ' ,
} ) . json ( )
raw_html = response [ ' parse ' ] [ ' text ' ] [ ' * ' ]
document = html . document_fromstring ( raw_html )
first_p = document . xpath ( ' //p ' ) [ 0 ]
intro_text = first_p . text_content ( )
# print(intro_text)
# ALL NAME
# https://daap.bannerrepeater.org/w/api.php?action=query&meta=siteinfo&siprop=namespaces|namespacealiases
# replace or insert tags
# https://stackoverflow.com/questions/2073541/search-and-replace-in-html-with-beautifulsoup