diff --git a/daapinterface.py b/daapinterface.py index 5714cdb..8b3329e 100644 --- a/daapinterface.py +++ b/daapinterface.py @@ -70,16 +70,16 @@ def home(): @app.route("/browsethearchive") def browsethearchive(): sparql.setQuery(''' - SELECT ?work ?workLabel ?image ?date WHERE { - SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". } - ?work wdt:P1 wd:Q1. - OPTIONAL { ?work p:P30 ?statement. + SELECT ?work ?workLabel ?image ?date WHERE { + SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". } + ?work wdt:P1 wd:Q1. + OPTIONAL { ?work p:P30 ?statement. ?statement ps:P30 ?image; pq:P54 wd:Q90.} - OPTIONAL { ?work wdt:P13 ?date. } - FILTER(?work != wd:Q57) -} -ORDER BY (?workLabel) + OPTIONAL { ?work wdt:P13 ?date. } + FILTER(?work != wd:Q57) + } + ORDER BY (?workLabel) ''') sparql.setReturnFormat(JSON) results = sparql.query().convert() @@ -295,7 +295,7 @@ def artwork(): ''') sparql.setReturnFormat(JSON) artworkpublisher = sparql.query().convert() - print(artworkpublisher) + # print(artworkpublisher) #####right middle @@ -316,6 +316,7 @@ def artwork(): sparql.setReturnFormat(JSON) artworkdescriptiondata = sparql.query().convert() # print("hello") + # print(artwork_id) # print(artworkdescriptiondata) # print("bye") @@ -323,38 +324,44 @@ def artwork(): # print(type(dictionary)) # get the description text or say there isn't any - # artworkdescriptiontext = None - - # for x in dictionary['results']['bindings']: - # if "accessURLdescriptionPage" in x: - # print("url for description present") + artworkdescriptiontext = None + + for x in dictionary['results']['bindings']: + if "accessURLdescriptionPage" in x: + print("url for description present") + print(x["accessURLdescriptionPage"]["value"]) + accessURLdescriptionUrl = x["accessURLdescriptionPage"]["value"] + desc_url = re.search(r':Q(.*)', accessURLdescriptionUrl, re.DOTALL) + # print(desc_url.group(1)) + desc_id=desc_url.group(1) # # get the description content from wiki - # artworkdescriptioncontenturl = "https://daap.bannerrepeater.org/w/index.php?title=Description:"+artwork_id+"&action=render" + artworkdescriptioncontenturl = "https://daap.bannerrepeater.org/w/index.php?title=Description:Q"+desc_id+"&action=render" # # Make a GET request to fetch the raw HTML content - # html_content = requests.get(artworkdescriptioncontenturl).text + html_content = requests.get(artworkdescriptioncontenturl).text # # Parse the html content - # soup = BeautifulSoup(html_content, "lxml") + soup = BeautifulSoup(html_content, "lxml") # # print(soup.prettify()) # print the parsed data of html - # # text=soup.find("div" , {"class" : "mw-parser-output"}) + text=soup.find("div" , {"class" : "mw-parser-output"}) # text=soup.find_all("p") - # artworkdescriptiontext=Markup(text) - # else: - # print("url for description absent") - # text="

Information not available

" - # artworkdescriptiontext=Markup(text) + artworkdescriptiontext=Markup(text) + else: + print("url for description absent") + text="

Information not available

" + artworkdescriptiontext=Markup(text) - artworkdescriptioncontenturl = "https://daap.bannerrepeater.org/w/index.php?title=Description:Q427&action=render" + #description Q427 for testing purposes + # artworkdescriptioncontenturl = "https://daap.bannerrepeater.org/w/index.php?title=Description:Q427&action=render" # Make a GET request to fetch the raw HTML content - html_content = requests.get(artworkdescriptioncontenturl).text + # html_content = requests.get(artworkdescriptioncontenturl).text # Parse the html content - soup = BeautifulSoup(html_content, "lxml") + # soup = BeautifulSoup(html_content, "lxml") # print(soup.prettify()) # print the parsed data of html - text=soup.find("div" , {"class" : "mw-parser-output"}) + # text=soup.find("div" , {"class" : "mw-parser-output"}) # text=soup.find_all("p") - artworkdescriptiontext=Markup(text) + # artworkdescriptiontext=Markup(text) - print(artworkdescriptiontext) + # print(artworkdescriptiontext) ############ right bottom LATER # exhibitions + id to be changed