description text extracted
This commit is contained in:
parent
328793e6c1
commit
0d67d6e2ad
@ -70,16 +70,16 @@ def home():
|
|||||||
@app.route("/browsethearchive")
|
@app.route("/browsethearchive")
|
||||||
def browsethearchive():
|
def browsethearchive():
|
||||||
sparql.setQuery('''
|
sparql.setQuery('''
|
||||||
SELECT ?work ?workLabel ?image ?date WHERE {
|
SELECT ?work ?workLabel ?image ?date WHERE {
|
||||||
SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
|
SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
|
||||||
?work wdt:P1 wd:Q1.
|
?work wdt:P1 wd:Q1.
|
||||||
OPTIONAL { ?work p:P30 ?statement.
|
OPTIONAL { ?work p:P30 ?statement.
|
||||||
?statement ps:P30 ?image;
|
?statement ps:P30 ?image;
|
||||||
pq:P54 wd:Q90.}
|
pq:P54 wd:Q90.}
|
||||||
OPTIONAL { ?work wdt:P13 ?date. }
|
OPTIONAL { ?work wdt:P13 ?date. }
|
||||||
FILTER(?work != wd:Q57)
|
FILTER(?work != wd:Q57)
|
||||||
}
|
}
|
||||||
ORDER BY (?workLabel)
|
ORDER BY (?workLabel)
|
||||||
''')
|
''')
|
||||||
sparql.setReturnFormat(JSON)
|
sparql.setReturnFormat(JSON)
|
||||||
results = sparql.query().convert()
|
results = sparql.query().convert()
|
||||||
@ -295,7 +295,7 @@ def artwork():
|
|||||||
''')
|
''')
|
||||||
sparql.setReturnFormat(JSON)
|
sparql.setReturnFormat(JSON)
|
||||||
artworkpublisher = sparql.query().convert()
|
artworkpublisher = sparql.query().convert()
|
||||||
print(artworkpublisher)
|
# print(artworkpublisher)
|
||||||
|
|
||||||
|
|
||||||
#####right middle
|
#####right middle
|
||||||
@ -316,6 +316,7 @@ def artwork():
|
|||||||
sparql.setReturnFormat(JSON)
|
sparql.setReturnFormat(JSON)
|
||||||
artworkdescriptiondata = sparql.query().convert()
|
artworkdescriptiondata = sparql.query().convert()
|
||||||
# print("hello")
|
# print("hello")
|
||||||
|
# print(artwork_id)
|
||||||
# print(artworkdescriptiondata)
|
# print(artworkdescriptiondata)
|
||||||
# print("bye")
|
# print("bye")
|
||||||
|
|
||||||
@ -323,38 +324,44 @@ def artwork():
|
|||||||
# print(type(dictionary))
|
# print(type(dictionary))
|
||||||
|
|
||||||
# get the description text or say there isn't any
|
# get the description text or say there isn't any
|
||||||
# artworkdescriptiontext = None
|
artworkdescriptiontext = None
|
||||||
|
|
||||||
# for x in dictionary['results']['bindings']:
|
for x in dictionary['results']['bindings']:
|
||||||
# if "accessURLdescriptionPage" in x:
|
if "accessURLdescriptionPage" in x:
|
||||||
# print("url for description present")
|
print("url for description present")
|
||||||
|
print(x["accessURLdescriptionPage"]["value"])
|
||||||
|
accessURLdescriptionUrl = x["accessURLdescriptionPage"]["value"]
|
||||||
|
desc_url = re.search(r':Q(.*)', accessURLdescriptionUrl, re.DOTALL)
|
||||||
|
# print(desc_url.group(1))
|
||||||
|
desc_id=desc_url.group(1)
|
||||||
# # get the description content from wiki
|
# # get the description content from wiki
|
||||||
# artworkdescriptioncontenturl = "https://daap.bannerrepeater.org/w/index.php?title=Description:"+artwork_id+"&action=render"
|
artworkdescriptioncontenturl = "https://daap.bannerrepeater.org/w/index.php?title=Description:Q"+desc_id+"&action=render"
|
||||||
# # Make a GET request to fetch the raw HTML content
|
# # Make a GET request to fetch the raw HTML content
|
||||||
# html_content = requests.get(artworkdescriptioncontenturl).text
|
html_content = requests.get(artworkdescriptioncontenturl).text
|
||||||
# # Parse the html content
|
# # Parse the html content
|
||||||
# soup = BeautifulSoup(html_content, "lxml")
|
soup = BeautifulSoup(html_content, "lxml")
|
||||||
# # print(soup.prettify()) # print the parsed data of html
|
# # print(soup.prettify()) # print the parsed data of html
|
||||||
# # text=soup.find("div" , {"class" : "mw-parser-output"})
|
text=soup.find("div" , {"class" : "mw-parser-output"})
|
||||||
# text=soup.find_all("p")
|
# text=soup.find_all("p")
|
||||||
# artworkdescriptiontext=Markup(text)
|
artworkdescriptiontext=Markup(text)
|
||||||
# else:
|
else:
|
||||||
# print("url for description absent")
|
print("url for description absent")
|
||||||
# text="<p>Information not available</p>"
|
text="<p>Information not available</p>"
|
||||||
# artworkdescriptiontext=Markup(text)
|
artworkdescriptiontext=Markup(text)
|
||||||
|
|
||||||
artworkdescriptioncontenturl = "https://daap.bannerrepeater.org/w/index.php?title=Description:Q427&action=render"
|
#description Q427 for testing purposes
|
||||||
|
# artworkdescriptioncontenturl = "https://daap.bannerrepeater.org/w/index.php?title=Description:Q427&action=render"
|
||||||
# Make a GET request to fetch the raw HTML content
|
# Make a GET request to fetch the raw HTML content
|
||||||
html_content = requests.get(artworkdescriptioncontenturl).text
|
# html_content = requests.get(artworkdescriptioncontenturl).text
|
||||||
# Parse the html content
|
# Parse the html content
|
||||||
soup = BeautifulSoup(html_content, "lxml")
|
# soup = BeautifulSoup(html_content, "lxml")
|
||||||
# print(soup.prettify()) # print the parsed data of html
|
# print(soup.prettify()) # print the parsed data of html
|
||||||
text=soup.find("div" , {"class" : "mw-parser-output"})
|
# text=soup.find("div" , {"class" : "mw-parser-output"})
|
||||||
# text=soup.find_all("p")
|
# text=soup.find_all("p")
|
||||||
artworkdescriptiontext=Markup(text)
|
# artworkdescriptiontext=Markup(text)
|
||||||
|
|
||||||
|
|
||||||
print(artworkdescriptiontext)
|
# print(artworkdescriptiontext)
|
||||||
|
|
||||||
############ right bottom LATER
|
############ right bottom LATER
|
||||||
# exhibitions + id to be changed
|
# exhibitions + id to be changed
|
||||||
|
Loading…
Reference in New Issue
Block a user