|
@ -78,8 +78,8 @@ def browsethearchive(): |
|
|
pq:P54 wd:Q90.} |
|
|
pq:P54 wd:Q90.} |
|
|
OPTIONAL { ?work wdt:P13 ?date. } |
|
|
OPTIONAL { ?work wdt:P13 ?date. } |
|
|
FILTER(?work != wd:Q57) |
|
|
FILTER(?work != wd:Q57) |
|
|
} |
|
|
} |
|
|
ORDER BY (?workLabel) |
|
|
ORDER BY (?workLabel) |
|
|
''') |
|
|
''') |
|
|
sparql.setReturnFormat(JSON) |
|
|
sparql.setReturnFormat(JSON) |
|
|
results = sparql.query().convert() |
|
|
results = sparql.query().convert() |
|
@ -295,7 +295,7 @@ def artwork(): |
|
|
''') |
|
|
''') |
|
|
sparql.setReturnFormat(JSON) |
|
|
sparql.setReturnFormat(JSON) |
|
|
artworkpublisher = sparql.query().convert() |
|
|
artworkpublisher = sparql.query().convert() |
|
|
print(artworkpublisher) |
|
|
# print(artworkpublisher) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#####right middle |
|
|
#####right middle |
|
@ -316,6 +316,7 @@ def artwork(): |
|
|
sparql.setReturnFormat(JSON) |
|
|
sparql.setReturnFormat(JSON) |
|
|
artworkdescriptiondata = sparql.query().convert() |
|
|
artworkdescriptiondata = sparql.query().convert() |
|
|
# print("hello") |
|
|
# print("hello") |
|
|
|
|
|
# print(artwork_id) |
|
|
# print(artworkdescriptiondata) |
|
|
# print(artworkdescriptiondata) |
|
|
# print("bye") |
|
|
# print("bye") |
|
|
|
|
|
|
|
@ -323,38 +324,44 @@ def artwork(): |
|
|
# print(type(dictionary)) |
|
|
# print(type(dictionary)) |
|
|
|
|
|
|
|
|
# get the description text or say there isn't any |
|
|
# get the description text or say there isn't any |
|
|
# artworkdescriptiontext = None |
|
|
artworkdescriptiontext = None |
|
|
|
|
|
|
|
|
# for x in dictionary['results']['bindings']: |
|
|
for x in dictionary['results']['bindings']: |
|
|
# if "accessURLdescriptionPage" in x: |
|
|
if "accessURLdescriptionPage" in x: |
|
|
# print("url for description present") |
|
|
print("url for description present") |
|
|
|
|
|
print(x["accessURLdescriptionPage"]["value"]) |
|
|
|
|
|
accessURLdescriptionUrl = x["accessURLdescriptionPage"]["value"] |
|
|
|
|
|
desc_url = re.search(r':Q(.*)', accessURLdescriptionUrl, re.DOTALL) |
|
|
|
|
|
# print(desc_url.group(1)) |
|
|
|
|
|
desc_id=desc_url.group(1) |
|
|
# # get the description content from wiki |
|
|
# # get the description content from wiki |
|
|
# artworkdescriptioncontenturl = "https://daap.bannerrepeater.org/w/index.php?title=Description:"+artwork_id+"&action=render" |
|
|
artworkdescriptioncontenturl = "https://daap.bannerrepeater.org/w/index.php?title=Description:Q"+desc_id+"&action=render" |
|
|
# # Make a GET request to fetch the raw HTML content |
|
|
# # Make a GET request to fetch the raw HTML content |
|
|
# html_content = requests.get(artworkdescriptioncontenturl).text |
|
|
html_content = requests.get(artworkdescriptioncontenturl).text |
|
|
# # Parse the html content |
|
|
# # Parse the html content |
|
|
# soup = BeautifulSoup(html_content, "lxml") |
|
|
soup = BeautifulSoup(html_content, "lxml") |
|
|
# # print(soup.prettify()) # print the parsed data of html |
|
|
# # print(soup.prettify()) # print the parsed data of html |
|
|
# # text=soup.find("div" , {"class" : "mw-parser-output"}) |
|
|
text=soup.find("div" , {"class" : "mw-parser-output"}) |
|
|
# text=soup.find_all("p") |
|
|
# text=soup.find_all("p") |
|
|
# artworkdescriptiontext=Markup(text) |
|
|
artworkdescriptiontext=Markup(text) |
|
|
# else: |
|
|
else: |
|
|
# print("url for description absent") |
|
|
print("url for description absent") |
|
|
# text="<p>Information not available</p>" |
|
|
text="<p>Information not available</p>" |
|
|
# artworkdescriptiontext=Markup(text) |
|
|
artworkdescriptiontext=Markup(text) |
|
|
|
|
|
|
|
|
artworkdescriptioncontenturl = "https://daap.bannerrepeater.org/w/index.php?title=Description:Q427&action=render" |
|
|
#description Q427 for testing purposes |
|
|
|
|
|
# artworkdescriptioncontenturl = "https://daap.bannerrepeater.org/w/index.php?title=Description:Q427&action=render" |
|
|
# Make a GET request to fetch the raw HTML content |
|
|
# Make a GET request to fetch the raw HTML content |
|
|
html_content = requests.get(artworkdescriptioncontenturl).text |
|
|
# html_content = requests.get(artworkdescriptioncontenturl).text |
|
|
# Parse the html content |
|
|
# Parse the html content |
|
|
soup = BeautifulSoup(html_content, "lxml") |
|
|
# soup = BeautifulSoup(html_content, "lxml") |
|
|
# print(soup.prettify()) # print the parsed data of html |
|
|
# print(soup.prettify()) # print the parsed data of html |
|
|
text=soup.find("div" , {"class" : "mw-parser-output"}) |
|
|
# text=soup.find("div" , {"class" : "mw-parser-output"}) |
|
|
# text=soup.find_all("p") |
|
|
# text=soup.find_all("p") |
|
|
artworkdescriptiontext=Markup(text) |
|
|
# artworkdescriptiontext=Markup(text) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
print(artworkdescriptiontext) |
|
|
# print(artworkdescriptiontext) |
|
|
|
|
|
|
|
|
############ right bottom LATER |
|
|
############ right bottom LATER |
|
|
# exhibitions + id to be changed |
|
|
# exhibitions + id to be changed |
|
|