description text extracted
This commit is contained in:
parent
328793e6c1
commit
0d67d6e2ad
@ -70,16 +70,16 @@ def home():
|
||||
@app.route("/browsethearchive")
|
||||
def browsethearchive():
|
||||
sparql.setQuery('''
|
||||
SELECT ?work ?workLabel ?image ?date WHERE {
|
||||
SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
|
||||
?work wdt:P1 wd:Q1.
|
||||
OPTIONAL { ?work p:P30 ?statement.
|
||||
SELECT ?work ?workLabel ?image ?date WHERE {
|
||||
SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
|
||||
?work wdt:P1 wd:Q1.
|
||||
OPTIONAL { ?work p:P30 ?statement.
|
||||
?statement ps:P30 ?image;
|
||||
pq:P54 wd:Q90.}
|
||||
OPTIONAL { ?work wdt:P13 ?date. }
|
||||
FILTER(?work != wd:Q57)
|
||||
}
|
||||
ORDER BY (?workLabel)
|
||||
OPTIONAL { ?work wdt:P13 ?date. }
|
||||
FILTER(?work != wd:Q57)
|
||||
}
|
||||
ORDER BY (?workLabel)
|
||||
''')
|
||||
sparql.setReturnFormat(JSON)
|
||||
results = sparql.query().convert()
|
||||
@ -295,7 +295,7 @@ def artwork():
|
||||
''')
|
||||
sparql.setReturnFormat(JSON)
|
||||
artworkpublisher = sparql.query().convert()
|
||||
print(artworkpublisher)
|
||||
# print(artworkpublisher)
|
||||
|
||||
|
||||
#####right middle
|
||||
@ -316,6 +316,7 @@ def artwork():
|
||||
sparql.setReturnFormat(JSON)
|
||||
artworkdescriptiondata = sparql.query().convert()
|
||||
# print("hello")
|
||||
# print(artwork_id)
|
||||
# print(artworkdescriptiondata)
|
||||
# print("bye")
|
||||
|
||||
@ -323,38 +324,44 @@ def artwork():
|
||||
# print(type(dictionary))
|
||||
|
||||
# get the description text or say there isn't any
|
||||
# artworkdescriptiontext = None
|
||||
artworkdescriptiontext = None
|
||||
|
||||
# for x in dictionary['results']['bindings']:
|
||||
# if "accessURLdescriptionPage" in x:
|
||||
# print("url for description present")
|
||||
for x in dictionary['results']['bindings']:
|
||||
if "accessURLdescriptionPage" in x:
|
||||
print("url for description present")
|
||||
print(x["accessURLdescriptionPage"]["value"])
|
||||
accessURLdescriptionUrl = x["accessURLdescriptionPage"]["value"]
|
||||
desc_url = re.search(r':Q(.*)', accessURLdescriptionUrl, re.DOTALL)
|
||||
# print(desc_url.group(1))
|
||||
desc_id=desc_url.group(1)
|
||||
# # get the description content from wiki
|
||||
# artworkdescriptioncontenturl = "https://daap.bannerrepeater.org/w/index.php?title=Description:"+artwork_id+"&action=render"
|
||||
artworkdescriptioncontenturl = "https://daap.bannerrepeater.org/w/index.php?title=Description:Q"+desc_id+"&action=render"
|
||||
# # Make a GET request to fetch the raw HTML content
|
||||
# html_content = requests.get(artworkdescriptioncontenturl).text
|
||||
html_content = requests.get(artworkdescriptioncontenturl).text
|
||||
# # Parse the html content
|
||||
# soup = BeautifulSoup(html_content, "lxml")
|
||||
soup = BeautifulSoup(html_content, "lxml")
|
||||
# # print(soup.prettify()) # print the parsed data of html
|
||||
# # text=soup.find("div" , {"class" : "mw-parser-output"})
|
||||
text=soup.find("div" , {"class" : "mw-parser-output"})
|
||||
# text=soup.find_all("p")
|
||||
# artworkdescriptiontext=Markup(text)
|
||||
# else:
|
||||
# print("url for description absent")
|
||||
# text="<p>Information not available</p>"
|
||||
# artworkdescriptiontext=Markup(text)
|
||||
artworkdescriptiontext=Markup(text)
|
||||
else:
|
||||
print("url for description absent")
|
||||
text="<p>Information not available</p>"
|
||||
artworkdescriptiontext=Markup(text)
|
||||
|
||||
artworkdescriptioncontenturl = "https://daap.bannerrepeater.org/w/index.php?title=Description:Q427&action=render"
|
||||
#description Q427 for testing purposes
|
||||
# artworkdescriptioncontenturl = "https://daap.bannerrepeater.org/w/index.php?title=Description:Q427&action=render"
|
||||
# Make a GET request to fetch the raw HTML content
|
||||
html_content = requests.get(artworkdescriptioncontenturl).text
|
||||
# html_content = requests.get(artworkdescriptioncontenturl).text
|
||||
# Parse the html content
|
||||
soup = BeautifulSoup(html_content, "lxml")
|
||||
# soup = BeautifulSoup(html_content, "lxml")
|
||||
# print(soup.prettify()) # print the parsed data of html
|
||||
text=soup.find("div" , {"class" : "mw-parser-output"})
|
||||
# text=soup.find("div" , {"class" : "mw-parser-output"})
|
||||
# text=soup.find_all("p")
|
||||
artworkdescriptiontext=Markup(text)
|
||||
# artworkdescriptiontext=Markup(text)
|
||||
|
||||
|
||||
print(artworkdescriptiontext)
|
||||
# print(artworkdescriptiontext)
|
||||
|
||||
############ right bottom LATER
|
||||
# exhibitions + id to be changed
|
||||
|
Loading…
Reference in New Issue
Block a user