jules
4 years ago
1 changed files with 338 additions and 0 deletions
@ -0,0 +1,338 @@ |
|||
""" |
|||
This file defines a few constants which configure |
|||
which Wikibase instance and which property/item ids |
|||
should be used |
|||
""" |
|||
|
|||
# Endpoint of the MediaWiki API of the Wikibase instance |
|||
mediawiki_api_endpoint = 'https://daap.bannerrepeater.org/w/api.php' |
|||
|
|||
# SPARQL endpoint |
|||
wikibase_sparql_endpoint = 'https://query.daap.bannerrepeater.org/' |
|||
|
|||
# Wikibase namespace ID, used to search for items |
|||
# For Wikidata this is 0, but most by default Wikibase uses 120, which is the default Wikibase 'Item:' namespace |
|||
# CHANGE THIS TO 120 if you are adapting this configuration file to another Wikibase |
|||
wikibase_namespace_id = 120 |
|||
|
|||
# Namespace prefix of Wikibase items (including colon, e.g. 'Item:') |
|||
wikibase_namespace_prefix = '' |
|||
|
|||
# User agent to connect to the Wikidata APIs |
|||
user_agent = 'OpenRefine-Daap reconciliation interface' |
|||
|
|||
# Regexes and group ids to extracts Qids and Pids from URLs |
|||
import re |
|||
q_re = re.compile(r'(<?https?://daap.bannerrepeater.org/(entity|wiki)/)?(Q[0-9]+)>?') |
|||
q_re_group_id = 3 |
|||
p_re = re.compile(r'(<?https?://daap.bannerrepeater.org/(entity/|wiki/Property:))?(P[0-9]+)>?') |
|||
p_re_group_id = 3 |
|||
|
|||
# Identifier space and schema space exposed to OpenRefine. |
|||
# This should match the IRI prefixes used in RDF serialization. |
|||
# Note that you should be careful about using http or https there, |
|||
# because any variation will break comparisons at various places. |
|||
identifier_space = 'http://daap.bannerrepeater.org/entity/' |
|||
#"schema_space = 'http://www.wikidata.org/prop/direct/' |
|||
schema_space = 'http://daap.bannerrepeater.org/wiki/Property:' |
|||
|
|||
# Pattern used to form the URL of a Qid. |
|||
# This is only used for viewing so it is fine to use any protocol (therefore, preferably HTTPS if supported) |
|||
qid_url_pattern = 'https://daap.bannerrepeater.org/wiki/Item:{{id}}' |
|||
|
|||
# By default, filter out any items which are instance |
|||
# of a subclass of this class. |
|||
# For Wikidata, this is "Wikimedia internal stuff". |
|||
# This filters out the disambiguation pages, categories, ... |
|||
# Set to None to disable this filter |
|||
# avoid_items_of_class = 'Q17442446' |
|||
avoid_items_of_class = None |
|||
|
|||
|
|||
# Service name exposed at various places, |
|||
# mainly in the list of reconciliation services of users |
|||
service_name = 'DEV Daap' |
|||
|
|||
# URL (without the trailing slash) where this server runs |
|||
this_host = 'http://116.203.73.138:8000' |
|||
|
|||
# The default limit on the number of results returned by us |
|||
default_num_results = 25 |
|||
|
|||
# The maximum number of search results to retrieve from the Wikidata search API |
|||
wd_api_max_search_results = 50 # need a bot account to get more |
|||
|
|||
# The matching score above which we should automatically match an item |
|||
validation_threshold = 95 |
|||
|
|||
# Redis client used for caching at various places |
|||
import redis |
|||
redis_client = redis.Redis(host='redis', port=6379, db=0, decode_responses=True) |
|||
|
|||
# Redis prefix to use in front of all keys |
|||
redis_key_prefix = 'openrefine_daap:' |
|||
|
|||
# Headers for the HTTP requests made by the tool |
|||
headers = { |
|||
'User-Agent':service_name + ' (OpenRefine-Daap reconciliation service)', |
|||
} |
|||
|
|||
# Previewing settings |
|||
|
|||
# Dimensions of the preview |
|||
zoom_ratio = 1.0 |
|||
preview_height = 100 |
|||
preview_width = 400 |
|||
|
|||
# With which should be requested from Commons for the thumbnail |
|||
thumbnail_width = 130 |
|||
|
|||
# All properties to use to get an image |
|||
image_properties = [ |
|||
'P18', |
|||
'P14', |
|||
'P15', |
|||
'P158', |
|||
'P181', |
|||
'P242', |
|||
'P1766', |
|||
'P1801', |
|||
'P1846', |
|||
'P2713', |
|||
'P2716', |
|||
'P2910', |
|||
'P3311', |
|||
'P3383', |
|||
'P3451', |
|||
'P1621', |
|||
'P154', |
|||
] |
|||
|
|||
# URL pattern to retrieve an image from its filename |
|||
image_download_pattern = 'https://upload.wikimedia.org/wikipedia/commons/thumb/%s/%s/%s/%dpx-%s' |
|||
|
|||
# Fallback URL of the image to use when previewing an item with no image |
|||
fallback_image_url = this_host + '/static/wikidata.png' |
|||
|
|||
# Alt text of the fallback image |
|||
fallback_image_alt = 'Daap' |
|||
|
|||
# Autodescribe endpoint to use. |
|||
# this is used to generate automatic descriptions from item contents. |
|||
# (disable this with: autodescribe_endpoint = None ) |
|||
autodescribe_endpoint = None |
|||
|
|||
# Property proposal settings |
|||
|
|||
# Default type : entity (Q35120) |
|||
default_type_entity = 'Q1' |
|||
|
|||
# Property path used to obtain the type of an item |
|||
type_property_path = 'P31' |
|||
|
|||
# Property to follow to fetch properties for a given type |
|||
property_for_this_type_property = 'P1963' |
|||
|
|||
# Optional prefix in front of properties in SPARQL-like property paths |
|||
wdt_prefix = 'wdt:' |
|||
|
|||
# Sparql query used to fetch all the subclasses of a given item. |
|||
# The '$qid' string will be replaced by the qid whose children should be fetched. |
|||
sparql_query_to_fetch_subclasses = """ |
|||
SELECT ?child WHERE { ?child wdt:P279* wd:$qid } |
|||
""" |
|||
|
|||
# Sparql query used to fetch all the properties which store unique identifiers |
|||
sparql_query_to_fetch_unique_id_properties = """ |
|||
SELECT ?pid WHERE { ?pid wdt:P31/wdt:P279* wd:Q19847637 } |
|||
""" |
|||
|
|||
# Sparql query used to propose properties to fetch for items of a given class |
|||
sparql_query_to_propose_properties = """ |
|||
SELECT ?prop ?propLabel ?depth WHERE { |
|||
SERVICE gas:service { |
|||
gas:program gas:gasClass "com.bigdata.rdf.graph.analytics.BFS" . |
|||
gas:program gas:in wd:$base_type . |
|||
gas:program gas:out ?out . |
|||
gas:program gas:out1 ?depth . |
|||
gas:program gas:maxIterations 10 . |
|||
gas:program gas:maxVisited 100 . |
|||
gas:program gas:linkType wdt:P279 . |
|||
} |
|||
SERVICE wikibase:label { bd:serviceParam wikibase:language "$lang" } |
|||
?out wdt:$property_for_this_type ?prop . |
|||
} |
|||
ORDER BY ?depth |
|||
LIMIT $limit |
|||
""" |
|||
|
|||
root@br-archive:~/openrefine-wikibase# cat config |
|||
config_docker_old.py config_docker.py config.py config_wikidata.py |
|||
root@br-archive:~/openrefine-wikibase# cat config.py |
|||
""" |
|||
This file defines a few constants which configure |
|||
which Wikibase instance and which property/item ids |
|||
should be used |
|||
""" |
|||
|
|||
# Endpoint of the MediaWiki API of the Wikibase instance |
|||
mediawiki_api_endpoint = 'https://daap.bannerrepeater.org/w/api.php' |
|||
|
|||
# SPARQL endpoint |
|||
wikibase_sparql_endpoint = 'https://query.daap.bannerrepeater.org/' |
|||
|
|||
# Wikibase namespace ID, used to search for items |
|||
# For Wikidata this is 0, but most by default Wikibase uses 120, which is the default Wikibase 'Item:' namespace |
|||
# CHANGE THIS TO 120 if you are adapting this configuration file to another Wikibase |
|||
wikibase_namespace_id = 120 |
|||
|
|||
# Namespace prefix of Wikibase items (including colon, e.g. 'Item:') |
|||
wikibase_namespace_prefix = '' |
|||
|
|||
# User agent to connect to the Wikidata APIs |
|||
user_agent = 'OpenRefine-Daap reconciliation interface' |
|||
|
|||
# Regexes and group ids to extracts Qids and Pids from URLs |
|||
import re |
|||
q_re = re.compile(r'(<?https?://daap.bannerrepeater.org/(entity|wiki)/)?(Q[0-9]+)>?') |
|||
q_re_group_id = 3 |
|||
p_re = re.compile(r'(<?https?://daap.bannerrepeater.org/(entity/|wiki/Property:))?(P[0-9]+)>?') |
|||
p_re_group_id = 3 |
|||
|
|||
# Identifier space and schema space exposed to OpenRefine. |
|||
# This should match the IRI prefixes used in RDF serialization. |
|||
# Note that you should be careful about using http or https there, |
|||
# because any variation will break comparisons at various places. |
|||
identifier_space = 'http://daap.bannerrepeater.org/entity/' |
|||
#"schema_space = 'http://www.wikidata.org/prop/direct/' |
|||
schema_space = 'http://daap.bannerrepeater.org/wiki/Property:' |
|||
|
|||
# Pattern used to form the URL of a Qid. |
|||
# This is only used for viewing so it is fine to use any protocol (therefore, preferably HTTPS if supported) |
|||
qid_url_pattern = 'https://daap.bannerrepeater.org/wiki/Item:{{id}}' |
|||
|
|||
# By default, filter out any items which are instance |
|||
# of a subclass of this class. |
|||
# For Wikidata, this is "Wikimedia internal stuff". |
|||
# This filters out the disambiguation pages, categories, ... |
|||
# Set to None to disable this filter |
|||
# avoid_items_of_class = 'Q17442446' |
|||
avoid_items_of_class = None |
|||
|
|||
|
|||
# Service name exposed at various places, |
|||
# mainly in the list of reconciliation services of users |
|||
service_name = 'DEV Daap' |
|||
|
|||
# URL (without the trailing slash) where this server runs |
|||
this_host = 'http://116.203.73.138:8000' |
|||
|
|||
# The default limit on the number of results returned by us |
|||
default_num_results = 25 |
|||
|
|||
# The maximum number of search results to retrieve from the Wikidata search API |
|||
wd_api_max_search_results = 50 # need a bot account to get more |
|||
|
|||
# The matching score above which we should automatically match an item |
|||
validation_threshold = 95 |
|||
|
|||
# Redis client used for caching at various places |
|||
import redis |
|||
redis_client = redis.Redis(host='redis', port=6379, db=0, decode_responses=True) |
|||
|
|||
# Redis prefix to use in front of all keys |
|||
redis_key_prefix = 'openrefine_daap:' |
|||
|
|||
# Headers for the HTTP requests made by the tool |
|||
headers = { |
|||
'User-Agent':service_name + ' (OpenRefine-Daap reconciliation service)', |
|||
} |
|||
|
|||
# Previewing settings |
|||
|
|||
# Dimensions of the preview |
|||
zoom_ratio = 1.0 |
|||
preview_height = 100 |
|||
preview_width = 400 |
|||
|
|||
# With which should be requested from Commons for the thumbnail |
|||
thumbnail_width = 130 |
|||
|
|||
# All properties to use to get an image |
|||
image_properties = [ |
|||
'P18', |
|||
'P14', |
|||
'P15', |
|||
'P158', |
|||
'P181', |
|||
'P242', |
|||
'P1766', |
|||
'P1801', |
|||
'P1846', |
|||
'P2713', |
|||
'P2716', |
|||
'P2910', |
|||
'P3311', |
|||
'P3383', |
|||
'P3451', |
|||
'P1621', |
|||
'P154', |
|||
] |
|||
|
|||
# URL pattern to retrieve an image from its filename |
|||
image_download_pattern = 'https://upload.wikimedia.org/wikipedia/commons/thumb/%s/%s/%s/%dpx-%s' |
|||
|
|||
# Fallback URL of the image to use when previewing an item with no image |
|||
fallback_image_url = this_host + '/static/wikidata.png' |
|||
|
|||
# Alt text of the fallback image |
|||
fallback_image_alt = 'Daap' |
|||
|
|||
# Autodescribe endpoint to use. |
|||
# this is used to generate automatic descriptions from item contents. |
|||
# (disable this with: autodescribe_endpoint = None ) |
|||
autodescribe_endpoint = None |
|||
|
|||
# Property proposal settings |
|||
|
|||
# Default type : entity (Q35120) |
|||
default_type_entity = 'Q1' |
|||
|
|||
# Property path used to obtain the type of an item |
|||
type_property_path = 'P31' |
|||
|
|||
# Property to follow to fetch properties for a given type |
|||
property_for_this_type_property = 'P1963' |
|||
|
|||
# Optional prefix in front of properties in SPARQL-like property paths |
|||
wdt_prefix = 'wdt:' |
|||
|
|||
# Sparql query used to fetch all the subclasses of a given item. |
|||
# The '$qid' string will be replaced by the qid whose children should be fetched. |
|||
sparql_query_to_fetch_subclasses = """ |
|||
SELECT ?child WHERE { ?child wdt:P279* wd:$qid } |
|||
""" |
|||
|
|||
# Sparql query used to fetch all the properties which store unique identifiers |
|||
sparql_query_to_fetch_unique_id_properties = """ |
|||
SELECT ?pid WHERE { ?pid wdt:P31/wdt:P279* wd:Q19847637 } |
|||
""" |
|||
|
|||
# Sparql query used to propose properties to fetch for items of a given class |
|||
sparql_query_to_propose_properties = """ |
|||
SELECT ?prop ?propLabel ?depth WHERE { |
|||
SERVICE gas:service { |
|||
gas:program gas:gasClass "com.bigdata.rdf.graph.analytics.BFS" . |
|||
gas:program gas:in wd:$base_type . |
|||
gas:program gas:out ?out . |
|||
gas:program gas:out1 ?depth . |
|||
gas:program gas:maxIterations 10 . |
|||
gas:program gas:maxVisited 100 . |
|||
gas:program gas:linkType wdt:P279 . |
|||
} |
|||
SERVICE wikibase:label { bd:serviceParam wikibase:language "$lang" } |
|||
?out wdt:$property_for_this_type ?prop . |
|||
} |
|||
ORDER BY ?depth |
|||
LIMIT $limit |
|||
""" |
Loading…
Reference in new issue