DAAP_interface/daapinterface.py

# encoding=utf8

# # # # # # # # # # # # # # # # # # # # # # # # 
# REQUIREMENTS
# # # # # # # # # # # # # # # # # # # # # # # # 
from flask import send_file, Flask, Response, url_for, render_template, Markup, jsonify, redirect, request, flash, session, make_response
import requests
from SPARQLWrapper import SPARQLWrapper, JSON
import json
# import pandas as pd
# ##### IMPORTS FOR TEST WIKIPAGE
from lxml import html
from bs4 import BeautifulSoup
import re


# # # # # # # # # # # # # # # # # # # # # # # # 
# GETTING STARTED
# # # # # # # # # # # # # # # # # # # # # # # # 
app = Flask(__name__, static_url_path='', static_folder="static", template_folder="templates")
app.jinja_env.add_extension('jinja2.ext.loopcontrols')


# # # # # # # # # # # # # # # # # # # # # # # # 
# GETTING WIKIBASE DATA
# # # # # # # # # # # # # # # # # # # # # # # # 

sparql = SPARQLWrapper("https://query.daap.bannerrepeater.org/proxy/wdqs/bigdata/namespace/wdq/sparql")
sparql2 = SPARQLWrapper("https://query.daap.bannerrepeater.org/proxy/wdqs/bigdata/namespace/wdq/sparql")
sparql3 = SPARQLWrapper("https://query.daap.bannerrepeater.org/proxy/wdqs/bigdata/namespace/wdq/sparql")


# # # # # # # # # # # # # # # # # # # # # # # # 
# PAGES
# # # # # # # # # # # # # # # # # # # # # # # # 
@app.route("/")
def home():
    sparql.setQuery('''
    SELECT ?work ?workLabel ?image ?date ?dateadded WHERE {
    SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
    ?work wdt:P1 wd:Q1;
    wdt:P87 ?dateadded.
    ?work p:P30 ?statement.
              ?statement ps:P30 ?image;
                         pq:P54 wd:Q90.
    ?work wdt:P13 ?date.
    FILTER(?work != wd:Q57)
    }
    ORDER BY (?dateadded)
    LIMIT 12
    ''')
    sparql.setReturnFormat(JSON)
    results = sparql.query().convert()
    ImagesBanner = []
    # print(results)
    for publication in results["results"]["bindings"]:
        publication_title = publication["workLabel"]["value"]
        publication_uri = publication["work"]["value"]
        #if key exists
        if "date" in publication:
            publication_date = publication["date"]["value"]
        if "image" in publication:
            publication_image = publication["image"]["value"]
            ImagesBanner.append(publication_image) 
    ImagesBanner = ImagesBanner[-12:] 
    return render_template('home.html', results=results, ImagesBanner=ImagesBanner)


@app.route("/browsethearchive")
def browsethearchive():
    sparql.setQuery('''
    SELECT ?work ?workLabel ?image ?date WHERE {
    SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
    ?work wdt:P1 wd:Q1.
    OPTIONAL { ?work p:P30 ?statement.
              ?statement ps:P30 ?image;
                         pq:P54 wd:Q90.}
    OPTIONAL { ?work wdt:P13 ?date. }
    FILTER(?work != wd:Q57)
    }
    ORDER BY (?workLabel)
    ''')
    sparql.setReturnFormat(JSON)
    results = sparql.query().convert()
    # print(results)
    for publication in results["results"]["bindings"]:
    	publication_title = publication["workLabel"]["value"]
    	publication_uri = publication["work"]["value"]
    	#if key exists
    	if "date" in publication:
    	    publication_date = publication["date"]["value"]
    	if "image" in publication:
    	    publication_image = publication["image"]["value"]
    return render_template('browsethearchive.html', results=results)

@app.route("/browsebycategory")
def browsebycategory():
    return render_template('browsebycategory.html')


##########################
# CATEGORIES TO BE BROWSED
#########################


######################### ARTIST INDEX
@app.route("/artistsindex")
def artistsindex():
    sparql.setQuery('''
        SELECT ?creators ?creatorsLabel ?creatorsAltLabel ?creatorsDescription 
        WHERE {
        {
        SELECT ?creators (COUNT(DISTINCT ?a) AS ?count) WHERE {
        ?a ?prop ?creators . 
        ?a wdt:P1 ?work .
        BIND (wdt:P9 AS ?prop) .
        BIND (wd:Q1 AS ?work) .
        } GROUP BY ?creators
        } . 
        SERVICE wikibase:label {
        bd:serviceParam wikibase:language "en" .
        }
        FILTER (?creators !=wd:Q82)
        }
        ORDER BY DESC(?count) ?creatorsLabel
        ''')
    sparql.setReturnFormat(JSON)
    results = sparql.query().convert()
    print(results)
    return render_template('artistsindex.html', results=results)

######################### PUBLISHERS INDEX
@app.route("/publishersindex")
def publishersindex():
    sparql.setQuery('''
        SELECT ?publishers ?publishersLabel ?publishersAltLabel ?publishersDescription WHERE {
  		{
    	SELECT ?publishers (COUNT(DISTINCT ?a) AS ?count) WHERE {
       	?a ?prop ?publishers . 
       	?a wdt:P1 ?work .
       	BIND (wdt:P10 AS ?prop) .
       	BIND (wd:Q1 AS ?work) .
    	} GROUP BY ?publishers
 		} . 
  		SERVICE wikibase:label {
    	bd:serviceParam wikibase:language "en" .
 	 	}
  		FILTER(?publishers != wd:Q83)
  		FILTER(?publishers != wd:Q71)
		}
        ORDER BY DESC(?count) ?publishersLabel
        ''')
    sparql.setReturnFormat(JSON)
    results = sparql.query().convert()
    return render_template('publishersindex.html', results=results)

######################### SELF PUBLISHED INDEX
@app.route("/selfpublishedindex")
def selfpublishedindex():
    sparql.setQuery('''
    SELECT ?work ?workLabel ?image ?date
    WHERE {
    SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
    ?work wdt:P1 wd:Q1;
    wdt:P10 wd:Q71.
    OPTIONAL {?work p:P30 ?statement.
              ?statement ps:P30 ?image;
                         pq:P54 wd:Q90.}
    OPTIONAL { ?work wdt:P13 ?date. } 
    FILTER(?work != wd:Q57)
    }
        ''')
    sparql.setReturnFormat(JSON)
    results = sparql.query().convert()
    return render_template('selfpublishedindex.html', results=results)

######################### ZINES INDEX
@app.route("/zinesindex")
def zinesindex():
    sparql.setQuery('''
    SELECT ?work ?workLabel ?image ?date
    WHERE {
    SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
    ?work wdt:P1 wd:Q1;
    wdt:P10 wd:Q71.
    OPTIONAL {?work p:P30 ?statement.
              ?statement ps:P30 ?image;
                         pq:P54 wd:Q90.}
    OPTIONAL { ?work wdt:P13 ?date. } 
    FILTER(?work != wd:Q57)
    }
        ''')
    sparql.setReturnFormat(JSON)
    results = sparql.query().convert()
    print(results)
    return render_template('zinesindex.html', results=results)


##########################
# DETAILED INDIVIDUAL PAGES
#########################

######################### ARTWORK
@app.route("/artwork", methods=['GET'])
def artwork():
    artwork_id = request.args.get('id')

# Artwork Intro / Top of the page
    sparql.setQuery('''
    SELECT ?work ?workLabel ?workDescription ?itemtypeLabel
    {
    VALUES ?work {wd:'''+artwork_id+'''}
    ?work wdt:P1 ?itemtype.
    SERVICE wikibase:label { bd:serviceParam wikibase:language "en". } 
    }
    ''')
    sparql.setReturnFormat(JSON)
    artworkintro = sparql.query().convert()
    # print(artworkintro)

# Image(s)
# query for later
    # sparql.setQuery('''
    # SELECT ?image ?depictsLabel ?licenseLabel
    # WHERE
    # {
    # VALUES ?work {wd:'''+artwork_id+'''}
    # ?work wdt:P30 ?image.
    # OPTIONAL { ?work p:P30 ?statement2.
    # ?statement2 ps:P30 ?image;
    # pq:P54 ?depicts;
    # pq:P56 ?license.}
    # SERVICE wikibase:label { bd:serviceParam wikibase:language "en".} 
    # }
    # ''')
#  temp fix query

    sparql.setQuery('''
    SELECT ?image ?depictsLabel ?licenseLabel   
    WHERE
    {
    VALUES ?work {wd:'''+artwork_id+'''}
    ?work wdt:P30 ?image.
    OPTIONAL { ?work p:P30 ?statement2.
    ?statement2 ps:P30 ?image;
    pq:P54 ?depicts;
    pq:P56 ?license.}
    FILTER(?depicts != wd:Q1897)
    SERVICE wikibase:label { bd:serviceParam wikibase:language "en".} 
    }''')

    sparql.setReturnFormat(JSON)
    artworkimages = sparql.query().convert()
    print(artworkimages)

######### Right top
# contributors
    sparql.setQuery('''
    SELECT DISTINCT ?creators ?creatorsLabel (group_concat(?creatorRolesLabel; separator="; ") as ?role)
    WHERE
    {
    VALUES ?work {wd:'''+artwork_id+'''}
    ?work wdt:P9 ?creators.
    OPTIONAL { ?work p:P9 ?statement1.
    ?statement1 ps:P9 ?creators;
    pq:P49 ?creatorRoles. }
    SERVICE wikibase:label { bd:serviceParam wikibase:language "en".
    ?creators rdfs:label ?creatorsLabel.
    ?creatorRoles rdfs:label ?creatorRolesLabel. } 
    }
    GROUP BY ?creators ?creatorsLabel
    ORDER BY ?creatorsLabel
    ''')
    sparql.setReturnFormat(JSON)
    artworkcontributors = sparql.query().convert()
    # print(artworkcontributors)

# date
    sparql.setQuery('''
    SELECT ?date ?sourceLabel
    WHERE
    {
    VALUES ?work {wd:'''+artwork_id+'''}
    ?work wdt:P13 ?date.
    OPTIONAL { ?work p:P13 ?statement1.
    ?statement1 ps:P13 ?date;
    pq:P50 ?source. }
    SERVICE wikibase:label { bd:serviceParam wikibase:language "en".} 
    }
    ''')
    sparql.setReturnFormat(JSON)
    artworkdate = sparql.query().convert()
    # print(artworkdate)

# publishers
    sparql.setQuery('''
    SELECT DISTINCT ?publishers ?publishersLabel (group_concat(?publishersRolesLabel; separator="; ") as ?role)
    WHERE
    {
    VALUES ?work {wd:'''+artwork_id+'''}
    ?work wdt:P10 ?publishers.
    OPTIONAL { ?work p:P10 ?statement1.
    ?statement1 ps:P10 ?publishers;
    pq:P49 ?creatorRoles. }
    SERVICE wikibase:label { bd:serviceParam wikibase:language "en".
    ?publishers rdfs:label ?publishersLabel.
    ?publishersRoles rdfs:label ?publishersRolesLabel. } 
    }
    GROUP BY ?publishers ?publishersLabel
    ORDER BY ?publishersLabel
    ''')
    sparql.setReturnFormat(JSON)
    artworkpublisher = sparql.query().convert()
    # print(artworkpublisher)


#####right middle
# description + id to be changed
    sparql.setQuery('''
    SELECT ?accessURLdescriptionPage ?authordescriptionPage ?authordescriptionPageLabel ?datedescriptionPage ?sourcedescriptionPage ?sourcedescriptionPageLabel
    WHERE
    {
    VALUES ?work {wd:'''+artwork_id+'''}
    ?work wdt:P65 ?descriptionPage.
    OPTIONAL { ?descriptionPage wdt:P4 ?accessURLdescriptionPage. }
    OPTIONAL { ?descriptionPage wdt:P9 ?authordescriptionPage. }  
    OPTIONAL { ?descriptionPage wdt:P13 ?datedescriptionPage. } 
    OPTIONAL { ?descriptionPage wdt:P50 ?sourcedescriptionPage. }
    SERVICE wikibase:label { bd:serviceParam wikibase:language "en". } 
    }
    ''')
    sparql.setReturnFormat(JSON)
    artworkdescriptiondata = sparql.query().convert()
    # print("hello")
    # print(artwork_id)
    # print(artworkdescriptiondata)
    # print("bye")

    dictionary = artworkdescriptiondata
    # print(type(dictionary))

    # get the description text or say there isn't any
    artworkdescriptiontext = None

    for x in dictionary['results']['bindings']:
        if "accessURLdescriptionPage" in x:
            print("url for description present")
            print(x["accessURLdescriptionPage"]["value"])
            accessURLdescriptionUrl = x["accessURLdescriptionPage"]["value"]
            desc_url = re.search(r':Q(.*)', accessURLdescriptionUrl, re.DOTALL)
            # print(desc_url.group(1))
            desc_id=desc_url.group(1)
    #         # get the description content from wiki 
            artworkdescriptioncontenturl = "https://daap.bannerrepeater.org/w/index.php?title=Description:Q"+desc_id+"&action=render"
    #         # Make a GET request to fetch the raw HTML content
            html_content = requests.get(artworkdescriptioncontenturl).text
    #         # Parse the html content
            soup = BeautifulSoup(html_content, "lxml")
    #         # print(soup.prettify()) # print the parsed data of html
            text=soup.find("div" , {"class" : "mw-parser-output"})
    #         text=soup.find_all("p")
            artworkdescriptiontext=Markup(text)
        else:
            print("url for description absent")
            text="<p>Information not available</p>"
            artworkdescriptiontext=Markup(text)
    
    #description Q427 for testing purposes
    # artworkdescriptioncontenturl = "https://daap.bannerrepeater.org/w/index.php?title=Description:Q427&action=render"
    # Make a GET request to fetch the raw HTML content
    # html_content = requests.get(artworkdescriptioncontenturl).text
    # Parse the html content
    # soup = BeautifulSoup(html_content, "lxml")
    # print(soup.prettify()) # print the parsed data of html
    # text=soup.find("div" , {"class" : "mw-parser-output"})
    # text=soup.find_all("p")
    # artworkdescriptiontext=Markup(text)


    # print(artworkdescriptiontext)

############ right bottom LATER
# exhibitions + id to be changed
    sparql.setQuery('''
    SELECT ?accessURLexhibitionHisPage ?authorexhibitionHisPageLabel ?dateexhibitionHisPage ?sourceexhibitionHisPage
    WHERE
    {
    VALUES ?work {wd:Q57}
    ?work wdt:P66 ?exhibitionHisPage.
    OPTIONAL { ?exhibitionHisPage wdt:P4 ?accessURLexhibitionHisPage. }
    OPTIONAL { ?exhibitionHisPage wdt:P9 ?authorexhibitionHisPage. }
    OPTIONAL { ?exhibitionHisPage wdt:P13 ?dateexhibitionHisPage. }
    OPTIONAL { ?exhibitionHisPage wdt:P50 ?sourceexhibitionHisPage. }  
    SERVICE wikibase:label { bd:serviceParam wikibase:language "en". } 
    }
    ''')
    sparql.setReturnFormat(JSON)
    artworkexhibitiondata = sparql.query().convert()

    # description content from wiki 
    artworkexhibitioncontenturl = "https://daap.bannerrepeater.org/w/index.php?title=History:"+artwork_id+"&action=render"
    # Make a GET request to fetch the raw HTML content
    html_content = requests.get(artworkexhibitioncontenturl).text
    # Parse the html content
    soup = BeautifulSoup(html_content, "lxml")
    # print(soup.prettify()) # print the parsed data of html

    text=soup.find("div" , {"class" : "mw-parser-output"})
    # text=soup.find_all("p")
    artworkexhibitiontext=Markup(text)
    # print(artworkexhibitiontext)

############## bottom
# copies in collection
    sparql.setQuery('''
    SELECT ?copiesCollections ?copiesCollectionsLabel ?collection ?collectionLabel ?image
    WHERE
    {
    VALUES ?work {wd:'''+artwork_id+'''}
    ?work wdt:P43 ?copiesCollections.
    ?copiesCollections wdt:P47 ?collection.
    OPTIONAL { ?collection wdt:P30 ?image. }
    SERVICE wikibase:label { bd:serviceParam wikibase:language "en". } 
    }  ''')
    sparql.setReturnFormat(JSON)
    copiesincollection = sparql.query().convert()
    # print(copiesincollection)

# related works
    sparql.setQuery('''
    SELECT ?relatedWorks ?relatedWorksLabel ?image ?daterelatedWorks 
    WHERE {
    {
    SELECT ?relatedWorks ?relatedWorksLabel (SAMPLE(?daterelatedWorks) AS ?daterelatedWorks)
    WHERE
        { VALUES ?work {wd:'''+artwork_id+'''}
        ?work wdt:P44 ?relatedWorks.
        OPTIONAL {?relatedWorks wdt:P13 ?daterelatedWorks.}
        SERVICE wikibase:label { bd:serviceParam wikibase:language "en". } 
        }
    GROUP BY ?relatedWorks ?relatedWorksLabel
    ORDER BY ?relatedWorksLabel
    }                     
    OPTIONAL {?relatedWorks wdt:P30 ?image.}
    } ''')
    sparql.setReturnFormat(JSON)
    relatedworks = sparql.query().convert()
    # print(relatedworks)


    return render_template('artwork.html', artwork_id=artwork_id, artworkintro=artworkintro, artworkimages=artworkimages, artworkcontributors=artworkcontributors, artworkdate=artworkdate, artworkpublisher=artworkpublisher, artworkdescriptiondata=artworkdescriptiondata, artworkdescriptiontext=artworkdescriptiontext, copiesincollection=copiesincollection, relatedworks=relatedworks)


######################### PERSON - FOR NOW THIS IS A MESS
@app.route("/person", methods=['GET'])
def person():
    person_id = request.args.get('id')
    sparql.setQuery('''

    SELECT ?item ?itemLabel ?itemDescription ?propLabel ?b ?bLabel
    WHERE
    {
    VALUES ?item {wd:'''+person_id+'''}
    ?item ?a ?b.

    SERVICE wikibase:label { bd:serviceParam wikibase:language "en". } 
    ?prop wikibase:directClaim ?a .
    }
	''')
    sparql.setReturnFormat(JSON)
    person_details = sparql.query().convert()
    print(person_details)


    sparql2.setQuery('''
        SELECT ?work ?workLabel ?image ?date 
        WHERE {
        SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
        ?work wdt:P1 wd:Q1.
        ?work wdt:P9 wd:'''+person_id+'''.
        OPTIONAL { ?work wdt:P30 ?image. }
        OPTIONAL { ?work wdt:P13 ?date. }
        FILTER(?work != wd:Q57)
        }
        ORDER BY (?workLabel)
        ''')
    sparql2.setReturnFormat(JSON)
    person_creatorof = sparql2.query().convert()
    
    sparql3.setQuery('''
        SELECT ?work ?workLabel ?image ?date 
        WHERE {
        SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
        ?work wdt:P1 wd:Q1.
        ?work wdt:P10 wd:'''+person_id+'''.
        OPTIONAL { ?work wdt:P30 ?image. }
        OPTIONAL { ?work wdt:P13 ?date. }
        FILTER(?work != wd:Q57)
        }
        ORDER BY (?workLabel)
    	''')
    sparql3.setReturnFormat(JSON)
    person_publisherof = sparql2.query().convert()

    person_url = ""
    person_name = "the name"
    person_description = "short bio"
    return render_template("person.html", person_id=person_id, person_creatorof=person_creatorof, person_publisherof=person_publisherof, person_details=person_details)


######################### ORGANISATION
@app.route("/organisation", methods=['GET'])
def organisation():
    org_id = request.args.get('id')
    return render_template("organisation.html")


#########################
# PAGES FROM WIKI
#########################

######################### SEARCH TOOLS
@app.route("/searchtools")
def searchtools():
    return render_template('searchtools.html')

######################### ABOUT
@app.route("/about")
def about():
    url="https://daap.bannerrepeater.org/w/index.php?title=About&action=render"
    # Make a GET request to fetch the raw HTML content
    html_content = requests.get(url).text
    # Parse the html content
    soup = BeautifulSoup(html_content, "lxml")
    # print(soup.prettify()) # print the parsed data of html

    text=soup.find("html")
    # adapt the path to img with regex

    # replaceString = "wiki/Special:Redirect/file/"
    # cleanSoup = BeautifulSoup(str(text).replace("wiki/File:", replaceString))


    # for a in soup.find_all('a', href=True): 
    #     if a.text: 
    #         print(a['href'])


    # replace src from img zith href from a href and remove the a 

    text=Markup(text)
    return render_template('about.html', text=text)

######################### TUTORIAL
@app.route("/tutorials")
def tutorials():
    url="https://daap.bannerrepeater.org/w/index.php?title=Tutorials&action=render"
    # Make a GET request to fetch the raw HTML content
    html_content = requests.get(url).text
    # Parse the html content
    soup = BeautifulSoup(html_content, "lxml")
    # print(soup.prettify()) # print the parsed data of html

    text=soup.find("html")
    text=Markup(text)
    return render_template('tutorials.html')


######################### UPLOAD
@app.route("/upload")
def upload():
    url="https://daap.bannerrepeater.org/w/index.php?title=Upload&action=render"
    # Make a GET request to fetch the raw HTML content
    html_content = requests.get(url).text
    # Parse the html content
    soup = BeautifulSoup(html_content, "lxml")
    # print(soup.prettify()) # print the parsed data of html

    text=soup.find("html")
    text=Markup(text)

    return render_template('upload.html', text=text)


# #################### CASE STUDY
@app.route("/casestudy")
def casestudy():
    url="https://daap.bannerrepeater.org/w/index.php?title=Carolee_Schneemann_case_study&action=render"
    # Make a GET request to fetch the raw HTML content
    html_content = requests.get(url).text
    # Parse the html content
    soup = BeautifulSoup(html_content, "lxml")
    # print(soup.prettify()) # print the parsed data of html

    text=soup.find("html")

    text=Markup(text)
    return render_template('casestudy.html', text=text)


# #################### CODE OF CONDUCT
@app.route("/codeofconduct")
def codeofconduct():
    url="https://daap.bannerrepeater.org/w/index.php?title=Code_of_Conduct&action=render"
    # Make a GET request to fetch the raw HTML content
    html_content = requests.get(url).text
    # Parse the html content
    soup = BeautifulSoup(html_content, "lxml")
    # print(soup.prettify()) # print the parsed data of html

    text=soup.find("html")

    text=Markup(text)
    return render_template('codeofconduct.html', text=text)


######################### LOGIN
#Goes to wikibase page

# ###################
# TEST 

response = requests.get(
    'https://daap.bannerrepeater.org/w/api.php',
    params={
        'action': 'parse',
        'page': 'Test',
        'format': 'json',
    }).json()
raw_html = response['parse']['text']['*']
document = html.document_fromstring(raw_html)
first_p = document.xpath('//p')[0]
intro_text = first_p.text_content()
# print(intro_text)


#  ALL NAME
#  https://daap.bannerrepeater.org/w/api.php?action=query&meta=siteinfo&siprop=namespaces|namespacealiases

# replace or insert tags
# https://stackoverflow.com/questions/2073541/search-and-replace-in-html-with-beautifulsoup