# encoding=utf8 # # # # # # # # # # # # # # # # # # # # # # # # # REQUIREMENTS # # # # # # # # # # # # # # # # # # # # # # # # from flask import send_file, Flask, Response, url_for, render_template, Markup, jsonify, redirect, request, flash, session, make_response import requests from SPARQLWrapper import SPARQLWrapper, JSON import json # import pandas as pd # ##### IMPORTS FOR TEST WIKIPAGE from lxml import html from bs4 import BeautifulSoup import re # # # # # # # # # # # # # # # # # # # # # # # # # GETTING STARTED # # # # # # # # # # # # # # # # # # # # # # # # app = Flask(__name__, static_url_path='', static_folder="static", template_folder="templates") app.jinja_env.add_extension('jinja2.ext.loopcontrols') # # # # # # # # # # # # # # # # # # # # # # # # # GETTING WIKIBASE DATA # # # # # # # # # # # # # # # # # # # # # # # # sparql = SPARQLWrapper("https://query.daap.bannerrepeater.org/proxy/wdqs/bigdata/namespace/wdq/sparql") sparql2 = SPARQLWrapper("https://query.daap.bannerrepeater.org/proxy/wdqs/bigdata/namespace/wdq/sparql") sparql3 = SPARQLWrapper("https://query.daap.bannerrepeater.org/proxy/wdqs/bigdata/namespace/wdq/sparql") # # # # # # # # # # # # # # # # # # # # # # # # # PAGES # # # # # # # # # # # # # # # # # # # # # # # # @app.route("/") def home(): sparql.setQuery(''' SELECT ?work ?workLabel ?image ?date ?dateadded WHERE { SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". } ?work wdt:P1 wd:Q1; wdt:P87 ?dateadded. ?work p:P30 ?statement. ?statement ps:P30 ?image; pq:P54 wd:Q90. ?work wdt:P13 ?date. FILTER(?work != wd:Q57) } ORDER BY (?dateadded) LIMIT 12 ''') sparql.setReturnFormat(JSON) results = sparql.query().convert() ImagesBanner = [] # print(results) for publication in results["results"]["bindings"]: publication_title = publication["workLabel"]["value"] publication_uri = publication["work"]["value"] #if key exists if "date" in publication: publication_date = publication["date"]["value"] if "image" in publication: publication_image = publication["image"]["value"] ImagesBanner.append(publication_image) ImagesBanner = ImagesBanner[-12:] return render_template('home.html', results=results, ImagesBanner=ImagesBanner) @app.route("/browsethearchive") def browsethearchive(): sparql.setQuery(''' SELECT ?work ?workLabel ?image ?date WHERE { SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". } ?work wdt:P1 wd:Q1. OPTIONAL { ?work p:P30 ?statement. ?statement ps:P30 ?image; pq:P54 wd:Q90.} OPTIONAL { ?work wdt:P13 ?date. } FILTER(?work != wd:Q57) } ORDER BY (?workLabel) ''') sparql.setReturnFormat(JSON) results = sparql.query().convert() # print(results) for publication in results["results"]["bindings"]: publication_title = publication["workLabel"]["value"] publication_uri = publication["work"]["value"] #if key exists if "date" in publication: publication_date = publication["date"]["value"] if "image" in publication: publication_image = publication["image"]["value"] return render_template('browsethearchive.html', results=results) @app.route("/browsebycategory") def browsebycategory(): return render_template('browsebycategory.html') ########################## # CATEGORIES TO BE BROWSED ######################### ######################### ARTIST INDEX @app.route("/artistsindex") def artistsindex(): sparql.setQuery(''' SELECT ?creators ?creatorsLabel ?creatorsAltLabel ?creatorsDescription WHERE { { SELECT ?creators (COUNT(DISTINCT ?a) AS ?count) WHERE { ?a ?prop ?creators . ?a wdt:P1 ?work . BIND (wdt:P9 AS ?prop) . BIND (wd:Q1 AS ?work) . } GROUP BY ?creators } . SERVICE wikibase:label { bd:serviceParam wikibase:language "en" . } FILTER (?creators !=wd:Q82) } ORDER BY DESC(?count) ?creatorsLabel ''') sparql.setReturnFormat(JSON) results = sparql.query().convert() print(results) return render_template('artistsindex.html', results=results) ######################### PUBLISHERS INDEX @app.route("/publishersindex") def publishersindex(): sparql.setQuery(''' SELECT ?publishers ?publishersLabel ?publishersAltLabel ?publishersDescription WHERE { { SELECT ?publishers (COUNT(DISTINCT ?a) AS ?count) WHERE { ?a ?prop ?publishers . ?a wdt:P1 ?work . BIND (wdt:P10 AS ?prop) . BIND (wd:Q1 AS ?work) . } GROUP BY ?publishers } . SERVICE wikibase:label { bd:serviceParam wikibase:language "en" . } FILTER(?publishers != wd:Q83) FILTER(?publishers != wd:Q71) } ORDER BY DESC(?count) ?publishersLabel ''') sparql.setReturnFormat(JSON) results = sparql.query().convert() return render_template('publishersindex.html', results=results) ######################### SELF PUBLISHED INDEX @app.route("/selfpublishedindex") def selfpublishedindex(): sparql.setQuery(''' SELECT ?work ?workLabel ?image ?date WHERE { SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". } ?work wdt:P1 wd:Q1; wdt:P10 wd:Q71. OPTIONAL {?work p:P30 ?statement. ?statement ps:P30 ?image; pq:P54 wd:Q90.} OPTIONAL { ?work wdt:P13 ?date. } FILTER(?work != wd:Q57) } ''') sparql.setReturnFormat(JSON) results = sparql.query().convert() return render_template('selfpublishedindex.html', results=results) ######################### ZINES INDEX @app.route("/zinesindex") def zinesindex(): sparql.setQuery(''' SELECT ?work ?workLabel ?image ?date WHERE { SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". } ?work wdt:P1 wd:Q1; wdt:P10 wd:Q71. OPTIONAL {?work p:P30 ?statement. ?statement ps:P30 ?image; pq:P54 wd:Q90.} OPTIONAL { ?work wdt:P13 ?date. } FILTER(?work != wd:Q57) } ''') sparql.setReturnFormat(JSON) results = sparql.query().convert() print(results) return render_template('zinesindex.html', results=results) ########################## # DETAILED INDIVIDUAL PAGES ######################### ######################### ARTWORK @app.route("/artwork", methods=['GET']) def artwork(): artwork_id = request.args.get('id') # Artwork Intro / Top of the page sparql.setQuery(''' SELECT ?work ?workLabel ?workDescription ?itemtypeLabel { VALUES ?work {wd:'''+artwork_id+'''} ?work wdt:P1 ?itemtype. SERVICE wikibase:label { bd:serviceParam wikibase:language "en". } } ''') sparql.setReturnFormat(JSON) artworkintro = sparql.query().convert() # print(artworkintro) # Image(s) sparql.setQuery(''' SELECT ?image ?depictsLabel ?licenseLabel WHERE { VALUES ?work {wd:'''+artwork_id+'''} ?work wdt:P30 ?image. OPTIONAL { ?work p:P30 ?statement2. ?statement2 ps:P30 ?image; pq:P54 ?depicts; pq:P56 ?license.} SERVICE wikibase:label { bd:serviceParam wikibase:language "en".} } ''') sparql.setReturnFormat(JSON) artworkimages = sparql.query().convert() print(artworkimages) ######### Right top # contributors sparql.setQuery(''' SELECT DISTINCT ?creators ?creatorsLabel (group_concat(?creatorRolesLabel; separator="; ") as ?role) WHERE { VALUES ?work {wd:'''+artwork_id+'''} ?work wdt:P9 ?creators. OPTIONAL { ?work p:P9 ?statement1. ?statement1 ps:P9 ?creators; pq:P49 ?creatorRoles. } SERVICE wikibase:label { bd:serviceParam wikibase:language "en". ?creators rdfs:label ?creatorsLabel. ?creatorRoles rdfs:label ?creatorRolesLabel. } } GROUP BY ?creators ?creatorsLabel ORDER BY ?creatorsLabel ''') sparql.setReturnFormat(JSON) artworkcontributors = sparql.query().convert() # print(artworkcontributors) # date sparql.setQuery(''' SELECT ?date ?sourceLabel WHERE { VALUES ?work {wd:'''+artwork_id+'''} ?work wdt:P13 ?date. OPTIONAL { ?work p:P13 ?statement1. ?statement1 ps:P13 ?date; pq:P50 ?source. } SERVICE wikibase:label { bd:serviceParam wikibase:language "en".} } ''') sparql.setReturnFormat(JSON) artworkdate = sparql.query().convert() # print(artworkdate) # publishers sparql.setQuery(''' SELECT DISTINCT ?publishers ?publishersLabel (group_concat(?publishersRolesLabel; separator="; ") as ?role) WHERE { VALUES ?work {wd:'''+artwork_id+'''} ?work wdt:P10 ?publishers. OPTIONAL { ?work p:P10 ?statement1. ?statement1 ps:P10 ?publishers; pq:P49 ?creatorRoles. } SERVICE wikibase:label { bd:serviceParam wikibase:language "en". ?publishers rdfs:label ?publishersLabel. ?publishersRoles rdfs:label ?publishersRolesLabel. } } GROUP BY ?publishers ?publishersLabel ORDER BY ?publishersLabel ''') sparql.setReturnFormat(JSON) artworkpublisher = sparql.query().convert() # print(artworkpublisher) #####right middle # description + id to be changed sparql.setQuery(''' SELECT ?accessURLdescriptionPage ?authordescriptionPage ?authordescriptionPageLabel ?datedescriptionPage ?sourcedescriptionPage ?sourcedescriptionPageLabel WHERE { VALUES ?work {wd:'''+artwork_id+'''} ?work wdt:P65 ?descriptionPage. OPTIONAL { ?descriptionPage wdt:P4 ?accessURLdescriptionPage. } OPTIONAL { ?descriptionPage wdt:P9 ?authordescriptionPage. } OPTIONAL { ?descriptionPage wdt:P13 ?datedescriptionPage. } OPTIONAL { ?descriptionPage wdt:P50 ?sourcedescriptionPage. } SERVICE wikibase:label { bd:serviceParam wikibase:language "en". } } ''') sparql.setReturnFormat(JSON) artworkdescriptiondata = sparql.query().convert() # print("hello") # print(artwork_id) # print(artworkdescriptiondata) # print("bye") dictionary = artworkdescriptiondata # print(type(dictionary)) # get the description text or say there isn't any artworkdescriptiontext = None for x in dictionary['results']['bindings']: if "accessURLdescriptionPage" in x: print("url for description present") print(x["accessURLdescriptionPage"]["value"]) accessURLdescriptionUrl = x["accessURLdescriptionPage"]["value"] desc_url = re.search(r':Q(.*)', accessURLdescriptionUrl, re.DOTALL) # print(desc_url.group(1)) desc_id=desc_url.group(1) # # get the description content from wiki artworkdescriptioncontenturl = "https://daap.bannerrepeater.org/w/index.php?title=Description:Q"+desc_id+"&action=render" # # Make a GET request to fetch the raw HTML content html_content = requests.get(artworkdescriptioncontenturl).text # # Parse the html content soup = BeautifulSoup(html_content, "lxml") # # print(soup.prettify()) # print the parsed data of html text=soup.find("div" , {"class" : "mw-parser-output"}) # text=soup.find_all("p") artworkdescriptiontext=Markup(text) else: print("url for description absent") text="
Information not available
" artworkdescriptiontext=Markup(text) #description Q427 for testing purposes # artworkdescriptioncontenturl = "https://daap.bannerrepeater.org/w/index.php?title=Description:Q427&action=render" # Make a GET request to fetch the raw HTML content # html_content = requests.get(artworkdescriptioncontenturl).text # Parse the html content # soup = BeautifulSoup(html_content, "lxml") # print(soup.prettify()) # print the parsed data of html # text=soup.find("div" , {"class" : "mw-parser-output"}) # text=soup.find_all("p") # artworkdescriptiontext=Markup(text) # print(artworkdescriptiontext) ############ right bottom LATER # exhibitions + id to be changed sparql.setQuery(''' SELECT ?accessURLexhibitionHisPage ?authorexhibitionHisPageLabel ?dateexhibitionHisPage ?sourceexhibitionHisPage WHERE { VALUES ?work {wd:Q57} ?work wdt:P66 ?exhibitionHisPage. OPTIONAL { ?exhibitionHisPage wdt:P4 ?accessURLexhibitionHisPage. } OPTIONAL { ?exhibitionHisPage wdt:P9 ?authorexhibitionHisPage. } OPTIONAL { ?exhibitionHisPage wdt:P13 ?dateexhibitionHisPage. } OPTIONAL { ?exhibitionHisPage wdt:P50 ?sourceexhibitionHisPage. } SERVICE wikibase:label { bd:serviceParam wikibase:language "en". } } ''') sparql.setReturnFormat(JSON) artworkexhibitiondata = sparql.query().convert() # description content from wiki artworkexhibitioncontenturl = "https://daap.bannerrepeater.org/w/index.php?title=History:"+artwork_id+"&action=render" # Make a GET request to fetch the raw HTML content html_content = requests.get(artworkexhibitioncontenturl).text # Parse the html content soup = BeautifulSoup(html_content, "lxml") # print(soup.prettify()) # print the parsed data of html text=soup.find("div" , {"class" : "mw-parser-output"}) # text=soup.find_all("p") artworkexhibitiontext=Markup(text) # print(artworkexhibitiontext) ############## bottom # copies in collection sparql.setQuery(''' SELECT ?copiesCollections ?copiesCollectionsLabel ?collection ?collectionLabel ?image WHERE { VALUES ?work {wd:'''+artwork_id+'''} ?work wdt:P43 ?copiesCollections. ?copiesCollections wdt:P47 ?collection. OPTIONAL { ?collection wdt:P30 ?image. } SERVICE wikibase:label { bd:serviceParam wikibase:language "en". } } ''') sparql.setReturnFormat(JSON) copiesincollection = sparql.query().convert() # print(copiesincollection) # related works sparql.setQuery(''' SELECT ?relatedWorks ?relatedWorksLabel ?image ?daterelatedWorks WHERE { { SELECT ?relatedWorks ?relatedWorksLabel (SAMPLE(?daterelatedWorks) AS ?daterelatedWorks) WHERE { VALUES ?work {wd:'''+artwork_id+'''} ?work wdt:P44 ?relatedWorks. OPTIONAL {?relatedWorks wdt:P13 ?daterelatedWorks.} SERVICE wikibase:label { bd:serviceParam wikibase:language "en". } } GROUP BY ?relatedWorks ?relatedWorksLabel ORDER BY ?relatedWorksLabel } OPTIONAL {?relatedWorks wdt:P30 ?image.} } ''') sparql.setReturnFormat(JSON) relatedworks = sparql.query().convert() # print(relatedworks) return render_template('artwork.html', artwork_id=artwork_id, artworkintro=artworkintro, artworkimages=artworkimages, artworkcontributors=artworkcontributors, artworkdate=artworkdate, artworkpublisher=artworkpublisher, artworkdescriptiondata=artworkdescriptiondata, artworkdescriptiontext=artworkdescriptiontext, copiesincollection=copiesincollection, relatedworks=relatedworks) ######################### PERSON - FOR NOW THIS IS A MESS @app.route("/person", methods=['GET']) def person(): person_id = request.args.get('id') sparql.setQuery(''' SELECT ?item ?itemLabel ?itemDescription ?propLabel ?b ?bLabel WHERE { VALUES ?item {wd:'''+person_id+'''} ?item ?a ?b. SERVICE wikibase:label { bd:serviceParam wikibase:language "en". } ?prop wikibase:directClaim ?a . } ''') sparql.setReturnFormat(JSON) person_details = sparql.query().convert() print(person_details) sparql2.setQuery(''' SELECT ?work ?workLabel ?image ?date WHERE { SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". } ?work wdt:P1 wd:Q1. ?work wdt:P9 wd:'''+person_id+'''. OPTIONAL { ?work wdt:P30 ?image. } OPTIONAL { ?work wdt:P13 ?date. } FILTER(?work != wd:Q57) } ORDER BY (?workLabel) ''') sparql2.setReturnFormat(JSON) person_creatorof = sparql2.query().convert() sparql3.setQuery(''' SELECT ?work ?workLabel ?image ?date WHERE { SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". } ?work wdt:P1 wd:Q1. ?work wdt:P10 wd:'''+person_id+'''. OPTIONAL { ?work wdt:P30 ?image. } OPTIONAL { ?work wdt:P13 ?date. } FILTER(?work != wd:Q57) } ORDER BY (?workLabel) ''') sparql3.setReturnFormat(JSON) person_publisherof = sparql2.query().convert() person_url = "" person_name = "the name" person_description = "short bio" return render_template("person.html", person_id=person_id, person_creatorof=person_creatorof, person_publisherof=person_publisherof, person_details=person_details) ######################### ORGANISATION @app.route("/organisation", methods=['GET']) def organisation(): org_id = request.args.get('id') return render_template("organisation.html") ######################### # PAGES FROM WIKI ######################### ######################### SEARCH TOOLS @app.route("/searchtools") def searchtools(): return render_template('searchtools.html') ######################### ABOUT @app.route("/about") def about(): url="https://daap.bannerrepeater.org/w/index.php?title=About&action=render" # Make a GET request to fetch the raw HTML content html_content = requests.get(url).text # Parse the html content soup = BeautifulSoup(html_content, "lxml") # print(soup.prettify()) # print the parsed data of html text=soup.find("html") # adapt the path to img with regex # replaceString = "wiki/Special:Redirect/file/" # cleanSoup = BeautifulSoup(str(text).replace("wiki/File:", replaceString)) # for a in soup.find_all('a', href=True): # if a.text: # print(a['href']) # replace src from img zith href from a href and remove the a text=Markup(text) return render_template('about.html', text=text) ######################### TUTORIAL @app.route("/tutorials") def tutorials(): url="https://daap.bannerrepeater.org/w/index.php?title=Tutorials&action=render" # Make a GET request to fetch the raw HTML content html_content = requests.get(url).text # Parse the html content soup = BeautifulSoup(html_content, "lxml") # print(soup.prettify()) # print the parsed data of html text=soup.find("html") text=Markup(text) return render_template('tutorials.html') ######################### UPLOAD @app.route("/upload") def upload(): url="https://daap.bannerrepeater.org/w/index.php?title=Upload&action=render" # Make a GET request to fetch the raw HTML content html_content = requests.get(url).text # Parse the html content soup = BeautifulSoup(html_content, "lxml") # print(soup.prettify()) # print the parsed data of html text=soup.find("html") text=Markup(text) return render_template('upload.html', text=text) # #################### CASE STUDY @app.route("/casestudy") def casestudy(): url="https://daap.bannerrepeater.org/w/index.php?title=Carolee_Schneemann_case_study&action=render" # Make a GET request to fetch the raw HTML content html_content = requests.get(url).text # Parse the html content soup = BeautifulSoup(html_content, "lxml") # print(soup.prettify()) # print the parsed data of html text=soup.find("html") text=Markup(text) return render_template('casestudy.html', text=text) # #################### CODE OF CONDUCT @app.route("/codeofconduct") def codeofconduct(): url="https://daap.bannerrepeater.org/w/index.php?title=Code_of_Conduct&action=render" # Make a GET request to fetch the raw HTML content html_content = requests.get(url).text # Parse the html content soup = BeautifulSoup(html_content, "lxml") # print(soup.prettify()) # print the parsed data of html text=soup.find("html") text=Markup(text) return render_template('codeofconduct.html', text=text) ######################### LOGIN #Goes to wikibase page # ################### # TEST response = requests.get( 'https://daap.bannerrepeater.org/w/api.php', params={ 'action': 'parse', 'page': 'Test', 'format': 'json', }).json() raw_html = response['parse']['text']['*'] document = html.document_fromstring(raw_html) first_p = document.xpath('//p')[0] intro_text = first_p.text_content() # print(intro_text) # ALL NAME # https://daap.bannerrepeater.org/w/api.php?action=query&meta=siteinfo&siprop=namespaces|namespacealiases # replace or insert tags # https://stackoverflow.com/questions/2073541/search-and-replace-in-html-with-beautifulsoup