varia.website/plugins/pelican-rdf/pelican_rdf.py

176 lines
6.5 KiB
Python
Raw Normal View History

2018-02-01 14:46:15 +01:00
from pelican.readers import BaseReader
from pelican.generators import CachingGenerator
from pelican.contents import Page, is_valid_content
from pelican import signals
import logging
from blinker import signal
import requests
from os import listdir
from os.path import isfile, join
"""
pelican-rdf
===============
This plugin integrates to pelican a new type of media, the vocabulary.
Vocabularies are .rdf or .owl files, and metadata about them is collected
through sparql queries.
"""
try:
import rdflib
from rdflib.query import Processor
rdflib_loaded=True
except ImportError:
rdflib_loaded=False
logger = logging.getLogger(__name__)
voc_generator_init = signal('voc_generator_init')
voc_generator_finalized = signal('voc_generator_finalized')
voc_writer_finalized = signal('voc_writer_finalized')
voc_generator_preread = signal('voc_generator_preread')
voc_generator_context = signal('voc_generator_context')
class VocabularyGenerator(CachingGenerator):
"""Generate vocabulary descriptions"""
# temporary file where the vocabulary is dereferenced to
# when collected online
_local_vocabulary_path = "/tmp/"
def __init__(self, *args, **kwargs):
logger.debug("Vocabulary generator called")
self.vocabularies =[]
super(VocabularyGenerator, self).__init__(*args, **kwargs)
# Called both for local and remote vocabulary context creation.
# Performs the actual Vocabulary generation.
def generate_vocabulary_context(
self, vocabulary_file_name, path_to_vocabulary):
logger.debug("Generating__ vocabulary context for "+
path_to_vocabulary+"/"+vocabulary_file_name)
voc = self.get_cached_data(vocabulary_file_name, None)
if voc is None:
try:
voc = self.readers.read_file(
base_path=path_to_vocabulary,
path=vocabulary_file_name,
content_class=Vocabulary,
context=self.context,
preread_signal=voc_generator_preread,
preread_sender=self,
context_signal=voc_generator_context,
context_sender=self)
except Exception as e:
logger.error(
'Could not process %s\n%s', vocabulary_file_name, e,
exc_info=self.settings.get('DEBUG', False))
self._add_failed_source_path(vocabulary_file_name)
if not is_valid_content(voc, vocabulary_file_name):
self._add_failed_source_path(vocabulary_file_name)
self.cache_data(vocabulary_file_name, voc)
self.vocabularies.append(voc)
self.add_source_path(voc)
def generate_local_context(self):
for f in self.get_files(
self.settings['VOC_PATHS'],
exclude=self.settings['VOC_EXCLUDES']):
self.generate_vocabulary_context(f, self.path)
def dereference(self, uri, local_file):
logger.debug("Dereferencing "+uri+" into "+local_file)
headers={"Accept":"application/rdf+xml"}
r = requests.get(uri, headers=headers)
with open(self._local_vocabulary_path+local_file, 'w') as f:
f.write(r.text)
def generate_remote_context(self):
for uri in self.settings["VOC_URIS"]:
logger.debug("Generating context for remote "+uri)
local_name = uri.split("/")[-1]+".rdf"
self.dereference(uri, local_name)
self.generate_vocabulary_context(
local_name,
self._local_vocabulary_path)
def generate_context(self):
self.generate_local_context()
self.generate_remote_context()
self._update_context(('vocabularies',))
self.save_cache()
self.readers.save_cache()
def generate_output(self, writer):
for voc in self.vocabularies:
writer.write_file(
voc.save_as, self.get_template(voc.template),
self.context, voc=voc,
relative_urls=self.settings['RELATIVE_URLS'],
override_output=hasattr(voc, 'override_save_as'))
voc_writer_finalized.send(self, writer=writer)
class RdfReader(BaseReader):
file_extensions = ['rdf', 'owl']
enabled = bool(rdflib_loaded)
def __init__(self, *args, **kwargs):
super(RdfReader, self).__init__(*args, **kwargs)
def read(self, source_path):
"""Parse content and metadata of an rdf file"""
logger.debug("Loading graph described in "+source_path)
graph = rdflib.Graph()
graph.load(source_path)
meta = {}
queries = [
f for f in listdir(self.settings["VOC_QUERIES_PATH"])
if (isfile(join(self.settings["VOC_QUERIES_PATH"], f))
and f.endswith(".sparql"))]
for query_path in queries:
query_file_path = self.settings["VOC_QUERIES_PATH"]+"/"+query_path
with open(query_file_path, "r") as query_file:
query = query_file.read()
# The name of the query identifies the elements in the context
query_key=query_path.split(".")[0]
result_set = graph.query(query)
# Each query result will be stored as a dictionnary in the
# vocabulary context, referenced by the query name as its key.
# Multiple results are stored in a list.
for result in result_set:
if not query_key in meta.keys():
meta[query_key]=result.asdict()
elif type(meta[query_key]) == list:
meta[query_key].append(result.asdict())
else:
meta[query_key]=[meta[query_key], result.asdict()]
meta["iri"] = meta["lov_metadata"]["iri"]
meta["description"] = meta["lov_metadata"]["description"]
meta["version"] = meta["lov_metadata"]["version"]
meta["title"] = meta["lov_metadata"]["title"]
return "", meta
class Vocabulary(Page):
mandatory_properties = ('iri','description','version', 'title')
default_template = 'vocabulary'
def add_reader(readers):
for ext in RdfReader.file_extensions:
readers.reader_classes[ext] = RdfReader
def add_generator(pelican_object):
print("Adding the generator")
return VocabularyGenerator
def register():
signals.get_generators.connect(add_generator)
signals.readers_init.connect(add_reader)