config file open refine as on Greenhost server
This commit is contained in:
parent
671110139c
commit
92d1ad30cc
338
config-OR-greenhost.py
Normal file
338
config-OR-greenhost.py
Normal file
@ -0,0 +1,338 @@
|
||||
"""
|
||||
This file defines a few constants which configure
|
||||
which Wikibase instance and which property/item ids
|
||||
should be used
|
||||
"""
|
||||
|
||||
# Endpoint of the MediaWiki API of the Wikibase instance
|
||||
mediawiki_api_endpoint = 'https://daap.bannerrepeater.org/w/api.php'
|
||||
|
||||
# SPARQL endpoint
|
||||
wikibase_sparql_endpoint = 'https://query.daap.bannerrepeater.org/'
|
||||
|
||||
# Wikibase namespace ID, used to search for items
|
||||
# For Wikidata this is 0, but most by default Wikibase uses 120, which is the default Wikibase 'Item:' namespace
|
||||
# CHANGE THIS TO 120 if you are adapting this configuration file to another Wikibase
|
||||
wikibase_namespace_id = 120
|
||||
|
||||
# Namespace prefix of Wikibase items (including colon, e.g. 'Item:')
|
||||
wikibase_namespace_prefix = ''
|
||||
|
||||
# User agent to connect to the Wikidata APIs
|
||||
user_agent = 'OpenRefine-Daap reconciliation interface'
|
||||
|
||||
# Regexes and group ids to extracts Qids and Pids from URLs
|
||||
import re
|
||||
q_re = re.compile(r'(<?https?://daap.bannerrepeater.org/(entity|wiki)/)?(Q[0-9]+)>?')
|
||||
q_re_group_id = 3
|
||||
p_re = re.compile(r'(<?https?://daap.bannerrepeater.org/(entity/|wiki/Property:))?(P[0-9]+)>?')
|
||||
p_re_group_id = 3
|
||||
|
||||
# Identifier space and schema space exposed to OpenRefine.
|
||||
# This should match the IRI prefixes used in RDF serialization.
|
||||
# Note that you should be careful about using http or https there,
|
||||
# because any variation will break comparisons at various places.
|
||||
identifier_space = 'http://daap.bannerrepeater.org/entity/'
|
||||
#"schema_space = 'http://www.wikidata.org/prop/direct/'
|
||||
schema_space = 'http://daap.bannerrepeater.org/wiki/Property:'
|
||||
|
||||
# Pattern used to form the URL of a Qid.
|
||||
# This is only used for viewing so it is fine to use any protocol (therefore, preferably HTTPS if supported)
|
||||
qid_url_pattern = 'https://daap.bannerrepeater.org/wiki/Item:{{id}}'
|
||||
|
||||
# By default, filter out any items which are instance
|
||||
# of a subclass of this class.
|
||||
# For Wikidata, this is "Wikimedia internal stuff".
|
||||
# This filters out the disambiguation pages, categories, ...
|
||||
# Set to None to disable this filter
|
||||
# avoid_items_of_class = 'Q17442446'
|
||||
avoid_items_of_class = None
|
||||
|
||||
|
||||
# Service name exposed at various places,
|
||||
# mainly in the list of reconciliation services of users
|
||||
service_name = 'DEV Daap'
|
||||
|
||||
# URL (without the trailing slash) where this server runs
|
||||
this_host = 'http://116.203.73.138:8000'
|
||||
|
||||
# The default limit on the number of results returned by us
|
||||
default_num_results = 25
|
||||
|
||||
# The maximum number of search results to retrieve from the Wikidata search API
|
||||
wd_api_max_search_results = 50 # need a bot account to get more
|
||||
|
||||
# The matching score above which we should automatically match an item
|
||||
validation_threshold = 95
|
||||
|
||||
# Redis client used for caching at various places
|
||||
import redis
|
||||
redis_client = redis.Redis(host='redis', port=6379, db=0, decode_responses=True)
|
||||
|
||||
# Redis prefix to use in front of all keys
|
||||
redis_key_prefix = 'openrefine_daap:'
|
||||
|
||||
# Headers for the HTTP requests made by the tool
|
||||
headers = {
|
||||
'User-Agent':service_name + ' (OpenRefine-Daap reconciliation service)',
|
||||
}
|
||||
|
||||
# Previewing settings
|
||||
|
||||
# Dimensions of the preview
|
||||
zoom_ratio = 1.0
|
||||
preview_height = 100
|
||||
preview_width = 400
|
||||
|
||||
# With which should be requested from Commons for the thumbnail
|
||||
thumbnail_width = 130
|
||||
|
||||
# All properties to use to get an image
|
||||
image_properties = [
|
||||
'P18',
|
||||
'P14',
|
||||
'P15',
|
||||
'P158',
|
||||
'P181',
|
||||
'P242',
|
||||
'P1766',
|
||||
'P1801',
|
||||
'P1846',
|
||||
'P2713',
|
||||
'P2716',
|
||||
'P2910',
|
||||
'P3311',
|
||||
'P3383',
|
||||
'P3451',
|
||||
'P1621',
|
||||
'P154',
|
||||
]
|
||||
|
||||
# URL pattern to retrieve an image from its filename
|
||||
image_download_pattern = 'https://upload.wikimedia.org/wikipedia/commons/thumb/%s/%s/%s/%dpx-%s'
|
||||
|
||||
# Fallback URL of the image to use when previewing an item with no image
|
||||
fallback_image_url = this_host + '/static/wikidata.png'
|
||||
|
||||
# Alt text of the fallback image
|
||||
fallback_image_alt = 'Daap'
|
||||
|
||||
# Autodescribe endpoint to use.
|
||||
# this is used to generate automatic descriptions from item contents.
|
||||
# (disable this with: autodescribe_endpoint = None )
|
||||
autodescribe_endpoint = None
|
||||
|
||||
# Property proposal settings
|
||||
|
||||
# Default type : entity (Q35120)
|
||||
default_type_entity = 'Q1'
|
||||
|
||||
# Property path used to obtain the type of an item
|
||||
type_property_path = 'P31'
|
||||
|
||||
# Property to follow to fetch properties for a given type
|
||||
property_for_this_type_property = 'P1963'
|
||||
|
||||
# Optional prefix in front of properties in SPARQL-like property paths
|
||||
wdt_prefix = 'wdt:'
|
||||
|
||||
# Sparql query used to fetch all the subclasses of a given item.
|
||||
# The '$qid' string will be replaced by the qid whose children should be fetched.
|
||||
sparql_query_to_fetch_subclasses = """
|
||||
SELECT ?child WHERE { ?child wdt:P279* wd:$qid }
|
||||
"""
|
||||
|
||||
# Sparql query used to fetch all the properties which store unique identifiers
|
||||
sparql_query_to_fetch_unique_id_properties = """
|
||||
SELECT ?pid WHERE { ?pid wdt:P31/wdt:P279* wd:Q19847637 }
|
||||
"""
|
||||
|
||||
# Sparql query used to propose properties to fetch for items of a given class
|
||||
sparql_query_to_propose_properties = """
|
||||
SELECT ?prop ?propLabel ?depth WHERE {
|
||||
SERVICE gas:service {
|
||||
gas:program gas:gasClass "com.bigdata.rdf.graph.analytics.BFS" .
|
||||
gas:program gas:in wd:$base_type .
|
||||
gas:program gas:out ?out .
|
||||
gas:program gas:out1 ?depth .
|
||||
gas:program gas:maxIterations 10 .
|
||||
gas:program gas:maxVisited 100 .
|
||||
gas:program gas:linkType wdt:P279 .
|
||||
}
|
||||
SERVICE wikibase:label { bd:serviceParam wikibase:language "$lang" }
|
||||
?out wdt:$property_for_this_type ?prop .
|
||||
}
|
||||
ORDER BY ?depth
|
||||
LIMIT $limit
|
||||
"""
|
||||
|
||||
root@br-archive:~/openrefine-wikibase# cat config
|
||||
config_docker_old.py config_docker.py config.py config_wikidata.py
|
||||
root@br-archive:~/openrefine-wikibase# cat config.py
|
||||
"""
|
||||
This file defines a few constants which configure
|
||||
which Wikibase instance and which property/item ids
|
||||
should be used
|
||||
"""
|
||||
|
||||
# Endpoint of the MediaWiki API of the Wikibase instance
|
||||
mediawiki_api_endpoint = 'https://daap.bannerrepeater.org/w/api.php'
|
||||
|
||||
# SPARQL endpoint
|
||||
wikibase_sparql_endpoint = 'https://query.daap.bannerrepeater.org/'
|
||||
|
||||
# Wikibase namespace ID, used to search for items
|
||||
# For Wikidata this is 0, but most by default Wikibase uses 120, which is the default Wikibase 'Item:' namespace
|
||||
# CHANGE THIS TO 120 if you are adapting this configuration file to another Wikibase
|
||||
wikibase_namespace_id = 120
|
||||
|
||||
# Namespace prefix of Wikibase items (including colon, e.g. 'Item:')
|
||||
wikibase_namespace_prefix = ''
|
||||
|
||||
# User agent to connect to the Wikidata APIs
|
||||
user_agent = 'OpenRefine-Daap reconciliation interface'
|
||||
|
||||
# Regexes and group ids to extracts Qids and Pids from URLs
|
||||
import re
|
||||
q_re = re.compile(r'(<?https?://daap.bannerrepeater.org/(entity|wiki)/)?(Q[0-9]+)>?')
|
||||
q_re_group_id = 3
|
||||
p_re = re.compile(r'(<?https?://daap.bannerrepeater.org/(entity/|wiki/Property:))?(P[0-9]+)>?')
|
||||
p_re_group_id = 3
|
||||
|
||||
# Identifier space and schema space exposed to OpenRefine.
|
||||
# This should match the IRI prefixes used in RDF serialization.
|
||||
# Note that you should be careful about using http or https there,
|
||||
# because any variation will break comparisons at various places.
|
||||
identifier_space = 'http://daap.bannerrepeater.org/entity/'
|
||||
#"schema_space = 'http://www.wikidata.org/prop/direct/'
|
||||
schema_space = 'http://daap.bannerrepeater.org/wiki/Property:'
|
||||
|
||||
# Pattern used to form the URL of a Qid.
|
||||
# This is only used for viewing so it is fine to use any protocol (therefore, preferably HTTPS if supported)
|
||||
qid_url_pattern = 'https://daap.bannerrepeater.org/wiki/Item:{{id}}'
|
||||
|
||||
# By default, filter out any items which are instance
|
||||
# of a subclass of this class.
|
||||
# For Wikidata, this is "Wikimedia internal stuff".
|
||||
# This filters out the disambiguation pages, categories, ...
|
||||
# Set to None to disable this filter
|
||||
# avoid_items_of_class = 'Q17442446'
|
||||
avoid_items_of_class = None
|
||||
|
||||
|
||||
# Service name exposed at various places,
|
||||
# mainly in the list of reconciliation services of users
|
||||
service_name = 'DEV Daap'
|
||||
|
||||
# URL (without the trailing slash) where this server runs
|
||||
this_host = 'http://116.203.73.138:8000'
|
||||
|
||||
# The default limit on the number of results returned by us
|
||||
default_num_results = 25
|
||||
|
||||
# The maximum number of search results to retrieve from the Wikidata search API
|
||||
wd_api_max_search_results = 50 # need a bot account to get more
|
||||
|
||||
# The matching score above which we should automatically match an item
|
||||
validation_threshold = 95
|
||||
|
||||
# Redis client used for caching at various places
|
||||
import redis
|
||||
redis_client = redis.Redis(host='redis', port=6379, db=0, decode_responses=True)
|
||||
|
||||
# Redis prefix to use in front of all keys
|
||||
redis_key_prefix = 'openrefine_daap:'
|
||||
|
||||
# Headers for the HTTP requests made by the tool
|
||||
headers = {
|
||||
'User-Agent':service_name + ' (OpenRefine-Daap reconciliation service)',
|
||||
}
|
||||
|
||||
# Previewing settings
|
||||
|
||||
# Dimensions of the preview
|
||||
zoom_ratio = 1.0
|
||||
preview_height = 100
|
||||
preview_width = 400
|
||||
|
||||
# With which should be requested from Commons for the thumbnail
|
||||
thumbnail_width = 130
|
||||
|
||||
# All properties to use to get an image
|
||||
image_properties = [
|
||||
'P18',
|
||||
'P14',
|
||||
'P15',
|
||||
'P158',
|
||||
'P181',
|
||||
'P242',
|
||||
'P1766',
|
||||
'P1801',
|
||||
'P1846',
|
||||
'P2713',
|
||||
'P2716',
|
||||
'P2910',
|
||||
'P3311',
|
||||
'P3383',
|
||||
'P3451',
|
||||
'P1621',
|
||||
'P154',
|
||||
]
|
||||
|
||||
# URL pattern to retrieve an image from its filename
|
||||
image_download_pattern = 'https://upload.wikimedia.org/wikipedia/commons/thumb/%s/%s/%s/%dpx-%s'
|
||||
|
||||
# Fallback URL of the image to use when previewing an item with no image
|
||||
fallback_image_url = this_host + '/static/wikidata.png'
|
||||
|
||||
# Alt text of the fallback image
|
||||
fallback_image_alt = 'Daap'
|
||||
|
||||
# Autodescribe endpoint to use.
|
||||
# this is used to generate automatic descriptions from item contents.
|
||||
# (disable this with: autodescribe_endpoint = None )
|
||||
autodescribe_endpoint = None
|
||||
|
||||
# Property proposal settings
|
||||
|
||||
# Default type : entity (Q35120)
|
||||
default_type_entity = 'Q1'
|
||||
|
||||
# Property path used to obtain the type of an item
|
||||
type_property_path = 'P31'
|
||||
|
||||
# Property to follow to fetch properties for a given type
|
||||
property_for_this_type_property = 'P1963'
|
||||
|
||||
# Optional prefix in front of properties in SPARQL-like property paths
|
||||
wdt_prefix = 'wdt:'
|
||||
|
||||
# Sparql query used to fetch all the subclasses of a given item.
|
||||
# The '$qid' string will be replaced by the qid whose children should be fetched.
|
||||
sparql_query_to_fetch_subclasses = """
|
||||
SELECT ?child WHERE { ?child wdt:P279* wd:$qid }
|
||||
"""
|
||||
|
||||
# Sparql query used to fetch all the properties which store unique identifiers
|
||||
sparql_query_to_fetch_unique_id_properties = """
|
||||
SELECT ?pid WHERE { ?pid wdt:P31/wdt:P279* wd:Q19847637 }
|
||||
"""
|
||||
|
||||
# Sparql query used to propose properties to fetch for items of a given class
|
||||
sparql_query_to_propose_properties = """
|
||||
SELECT ?prop ?propLabel ?depth WHERE {
|
||||
SERVICE gas:service {
|
||||
gas:program gas:gasClass "com.bigdata.rdf.graph.analytics.BFS" .
|
||||
gas:program gas:in wd:$base_type .
|
||||
gas:program gas:out ?out .
|
||||
gas:program gas:out1 ?depth .
|
||||
gas:program gas:maxIterations 10 .
|
||||
gas:program gas:maxVisited 100 .
|
||||
gas:program gas:linkType wdt:P279 .
|
||||
}
|
||||
SERVICE wikibase:label { bd:serviceParam wikibase:language "$lang" }
|
||||
?out wdt:$property_for_this_type ?prop .
|
||||
}
|
||||
ORDER BY ?depth
|
||||
LIMIT $limit
|
||||
"""
|
Loading…
Reference in New Issue
Block a user