""" This file defines a few constants which configure which Wikibase instance and which property/item ids should be used """ # Endpoint of the MediaWiki API of the Wikibase instance mediawiki_api_endpoint = 'https://daap.bannerrepeater.org/w/api.php' # SPARQL endpoint wikibase_sparql_endpoint = 'https://query.daap.bannerrepeater.org/' # Wikibase namespace ID, used to search for items # For Wikidata this is 0, but most by default Wikibase uses 120, which is the default Wikibase 'Item:' namespace # CHANGE THIS TO 120 if you are adapting this configuration file to another Wikibase wikibase_namespace_id = 120 # Namespace prefix of Wikibase items (including colon, e.g. 'Item:') wikibase_namespace_prefix = '' # User agent to connect to the Wikidata APIs user_agent = 'OpenRefine-Daap reconciliation interface' # Regexes and group ids to extracts Qids and Pids from URLs import re q_re = re.compile(r'(?') q_re_group_id = 3 p_re = re.compile(r'(?') p_re_group_id = 3 # Identifier space and schema space exposed to OpenRefine. # This should match the IRI prefixes used in RDF serialization. # Note that you should be careful about using http or https there, # because any variation will break comparisons at various places. identifier_space = 'http://daap.bannerrepeater.org/entity/' #"schema_space = 'http://www.wikidata.org/prop/direct/' schema_space = 'http://daap.bannerrepeater.org/wiki/Property:' # Pattern used to form the URL of a Qid. # This is only used for viewing so it is fine to use any protocol (therefore, preferably HTTPS if supported) qid_url_pattern = 'https://daap.bannerrepeater.org/wiki/Item:{{id}}' # By default, filter out any items which are instance # of a subclass of this class. # For Wikidata, this is "Wikimedia internal stuff". # This filters out the disambiguation pages, categories, ... # Set to None to disable this filter # avoid_items_of_class = 'Q17442446' avoid_items_of_class = None # Service name exposed at various places, # mainly in the list of reconciliation services of users service_name = 'DEV Daap' # URL (without the trailing slash) where this server runs this_host = 'http://116.203.73.138:8000' # The default limit on the number of results returned by us default_num_results = 25 # The maximum number of search results to retrieve from the Wikidata search API wd_api_max_search_results = 50 # need a bot account to get more # The matching score above which we should automatically match an item validation_threshold = 95 # Redis client used for caching at various places import redis redis_client = redis.Redis(host='redis', port=6379, db=0, decode_responses=True) # Redis prefix to use in front of all keys redis_key_prefix = 'openrefine_daap:' # Headers for the HTTP requests made by the tool headers = { 'User-Agent':service_name + ' (OpenRefine-Daap reconciliation service)', } # Previewing settings # Dimensions of the preview zoom_ratio = 1.0 preview_height = 100 preview_width = 400 # With which should be requested from Commons for the thumbnail thumbnail_width = 130 # All properties to use to get an image image_properties = [ 'P18', 'P14', 'P15', 'P158', 'P181', 'P242', 'P1766', 'P1801', 'P1846', 'P2713', 'P2716', 'P2910', 'P3311', 'P3383', 'P3451', 'P1621', 'P154', ] # URL pattern to retrieve an image from its filename image_download_pattern = 'https://upload.wikimedia.org/wikipedia/commons/thumb/%s/%s/%s/%dpx-%s' # Fallback URL of the image to use when previewing an item with no image fallback_image_url = this_host + '/static/wikidata.png' # Alt text of the fallback image fallback_image_alt = 'Daap' # Autodescribe endpoint to use. # this is used to generate automatic descriptions from item contents. # (disable this with: autodescribe_endpoint = None ) autodescribe_endpoint = None # Property proposal settings # Default type : entity (Q35120) default_type_entity = 'Q1' # Property path used to obtain the type of an item type_property_path = 'P31' # Property to follow to fetch properties for a given type property_for_this_type_property = 'P1963' # Optional prefix in front of properties in SPARQL-like property paths wdt_prefix = 'wdt:' # Sparql query used to fetch all the subclasses of a given item. # The '$qid' string will be replaced by the qid whose children should be fetched. sparql_query_to_fetch_subclasses = """ SELECT ?child WHERE { ?child wdt:P279* wd:$qid } """ # Sparql query used to fetch all the properties which store unique identifiers sparql_query_to_fetch_unique_id_properties = """ SELECT ?pid WHERE { ?pid wdt:P31/wdt:P279* wd:Q19847637 } """ # Sparql query used to propose properties to fetch for items of a given class sparql_query_to_propose_properties = """ SELECT ?prop ?propLabel ?depth WHERE { SERVICE gas:service { gas:program gas:gasClass "com.bigdata.rdf.graph.analytics.BFS" . gas:program gas:in wd:$base_type . gas:program gas:out ?out . gas:program gas:out1 ?depth . gas:program gas:maxIterations 10 . gas:program gas:maxVisited 100 . gas:program gas:linkType wdt:P279 . } SERVICE wikibase:label { bd:serviceParam wikibase:language "$lang" } ?out wdt:$property_for_this_type ?prop . } ORDER BY ?depth LIMIT $limit """ root@br-archive:~/openrefine-wikibase# cat config config_docker_old.py config_docker.py config.py config_wikidata.py root@br-archive:~/openrefine-wikibase# cat config.py """ This file defines a few constants which configure which Wikibase instance and which property/item ids should be used """ # Endpoint of the MediaWiki API of the Wikibase instance mediawiki_api_endpoint = 'https://daap.bannerrepeater.org/w/api.php' # SPARQL endpoint wikibase_sparql_endpoint = 'https://query.daap.bannerrepeater.org/' # Wikibase namespace ID, used to search for items # For Wikidata this is 0, but most by default Wikibase uses 120, which is the default Wikibase 'Item:' namespace # CHANGE THIS TO 120 if you are adapting this configuration file to another Wikibase wikibase_namespace_id = 120 # Namespace prefix of Wikibase items (including colon, e.g. 'Item:') wikibase_namespace_prefix = '' # User agent to connect to the Wikidata APIs user_agent = 'OpenRefine-Daap reconciliation interface' # Regexes and group ids to extracts Qids and Pids from URLs import re q_re = re.compile(r'(?') q_re_group_id = 3 p_re = re.compile(r'(?') p_re_group_id = 3 # Identifier space and schema space exposed to OpenRefine. # This should match the IRI prefixes used in RDF serialization. # Note that you should be careful about using http or https there, # because any variation will break comparisons at various places. identifier_space = 'http://daap.bannerrepeater.org/entity/' #"schema_space = 'http://www.wikidata.org/prop/direct/' schema_space = 'http://daap.bannerrepeater.org/wiki/Property:' # Pattern used to form the URL of a Qid. # This is only used for viewing so it is fine to use any protocol (therefore, preferably HTTPS if supported) qid_url_pattern = 'https://daap.bannerrepeater.org/wiki/Item:{{id}}' # By default, filter out any items which are instance # of a subclass of this class. # For Wikidata, this is "Wikimedia internal stuff". # This filters out the disambiguation pages, categories, ... # Set to None to disable this filter # avoid_items_of_class = 'Q17442446' avoid_items_of_class = None # Service name exposed at various places, # mainly in the list of reconciliation services of users service_name = 'DEV Daap' # URL (without the trailing slash) where this server runs this_host = 'http://116.203.73.138:8000' # The default limit on the number of results returned by us default_num_results = 25 # The maximum number of search results to retrieve from the Wikidata search API wd_api_max_search_results = 50 # need a bot account to get more # The matching score above which we should automatically match an item validation_threshold = 95 # Redis client used for caching at various places import redis redis_client = redis.Redis(host='redis', port=6379, db=0, decode_responses=True) # Redis prefix to use in front of all keys redis_key_prefix = 'openrefine_daap:' # Headers for the HTTP requests made by the tool headers = { 'User-Agent':service_name + ' (OpenRefine-Daap reconciliation service)', } # Previewing settings # Dimensions of the preview zoom_ratio = 1.0 preview_height = 100 preview_width = 400 # With which should be requested from Commons for the thumbnail thumbnail_width = 130 # All properties to use to get an image image_properties = [ 'P18', 'P14', 'P15', 'P158', 'P181', 'P242', 'P1766', 'P1801', 'P1846', 'P2713', 'P2716', 'P2910', 'P3311', 'P3383', 'P3451', 'P1621', 'P154', ] # URL pattern to retrieve an image from its filename image_download_pattern = 'https://upload.wikimedia.org/wikipedia/commons/thumb/%s/%s/%s/%dpx-%s' # Fallback URL of the image to use when previewing an item with no image fallback_image_url = this_host + '/static/wikidata.png' # Alt text of the fallback image fallback_image_alt = 'Daap' # Autodescribe endpoint to use. # this is used to generate automatic descriptions from item contents. # (disable this with: autodescribe_endpoint = None ) autodescribe_endpoint = None # Property proposal settings # Default type : entity (Q35120) default_type_entity = 'Q1' # Property path used to obtain the type of an item type_property_path = 'P31' # Property to follow to fetch properties for a given type property_for_this_type_property = 'P1963' # Optional prefix in front of properties in SPARQL-like property paths wdt_prefix = 'wdt:' # Sparql query used to fetch all the subclasses of a given item. # The '$qid' string will be replaced by the qid whose children should be fetched. sparql_query_to_fetch_subclasses = """ SELECT ?child WHERE { ?child wdt:P279* wd:$qid } """ # Sparql query used to fetch all the properties which store unique identifiers sparql_query_to_fetch_unique_id_properties = """ SELECT ?pid WHERE { ?pid wdt:P31/wdt:P279* wd:Q19847637 } """ # Sparql query used to propose properties to fetch for items of a given class sparql_query_to_propose_properties = """ SELECT ?prop ?propLabel ?depth WHERE { SERVICE gas:service { gas:program gas:gasClass "com.bigdata.rdf.graph.analytics.BFS" . gas:program gas:in wd:$base_type . gas:program gas:out ?out . gas:program gas:out1 ?depth . gas:program gas:maxIterations 10 . gas:program gas:maxVisited 100 . gas:program gas:linkType wdt:P279 . } SERVICE wikibase:label { bd:serviceParam wikibase:language "$lang" } ?out wdt:$property_for_this_type ?prop . } ORDER BY ?depth LIMIT $limit """