forked from varia/varia.website
176 lines
6.5 KiB
Python
176 lines
6.5 KiB
Python
from pelican.readers import BaseReader
|
|
from pelican.generators import CachingGenerator
|
|
from pelican.contents import Page, is_valid_content
|
|
from pelican import signals
|
|
import logging
|
|
from blinker import signal
|
|
import requests
|
|
from os import listdir
|
|
from os.path import isfile, join
|
|
|
|
"""
|
|
pelican-rdf
|
|
===============
|
|
|
|
This plugin integrates to pelican a new type of media, the vocabulary.
|
|
Vocabularies are .rdf or .owl files, and metadata about them is collected
|
|
through sparql queries.
|
|
"""
|
|
|
|
|
|
try:
|
|
import rdflib
|
|
from rdflib.query import Processor
|
|
rdflib_loaded=True
|
|
except ImportError:
|
|
rdflib_loaded=False
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
voc_generator_init = signal('voc_generator_init')
|
|
voc_generator_finalized = signal('voc_generator_finalized')
|
|
voc_writer_finalized = signal('voc_writer_finalized')
|
|
voc_generator_preread = signal('voc_generator_preread')
|
|
voc_generator_context = signal('voc_generator_context')
|
|
|
|
class VocabularyGenerator(CachingGenerator):
|
|
"""Generate vocabulary descriptions"""
|
|
|
|
# temporary file where the vocabulary is dereferenced to
|
|
# when collected online
|
|
_local_vocabulary_path = "/tmp/"
|
|
|
|
def __init__(self, *args, **kwargs):
|
|
logger.debug("Vocabulary generator called")
|
|
self.vocabularies =[]
|
|
super(VocabularyGenerator, self).__init__(*args, **kwargs)
|
|
|
|
# Called both for local and remote vocabulary context creation.
|
|
# Performs the actual Vocabulary generation.
|
|
def generate_vocabulary_context(
|
|
self, vocabulary_file_name, path_to_vocabulary):
|
|
logger.debug("Generating__ vocabulary context for "+
|
|
path_to_vocabulary+"/"+vocabulary_file_name)
|
|
voc = self.get_cached_data(vocabulary_file_name, None)
|
|
if voc is None:
|
|
try:
|
|
voc = self.readers.read_file(
|
|
base_path=path_to_vocabulary,
|
|
path=vocabulary_file_name,
|
|
content_class=Vocabulary,
|
|
context=self.context,
|
|
preread_signal=voc_generator_preread,
|
|
preread_sender=self,
|
|
context_signal=voc_generator_context,
|
|
context_sender=self)
|
|
except Exception as e:
|
|
logger.error(
|
|
'Could not process %s\n%s', vocabulary_file_name, e,
|
|
exc_info=self.settings.get('DEBUG', False))
|
|
self._add_failed_source_path(vocabulary_file_name)
|
|
|
|
if not is_valid_content(voc, vocabulary_file_name):
|
|
self._add_failed_source_path(vocabulary_file_name)
|
|
|
|
self.cache_data(vocabulary_file_name, voc)
|
|
self.vocabularies.append(voc)
|
|
self.add_source_path(voc)
|
|
|
|
|
|
def generate_local_context(self):
|
|
for f in self.get_files(
|
|
self.settings['VOC_PATHS'],
|
|
exclude=self.settings['VOC_EXCLUDES']):
|
|
self.generate_vocabulary_context(f, self.path)
|
|
|
|
def dereference(self, uri, local_file):
|
|
logger.debug("Dereferencing "+uri+" into "+local_file)
|
|
headers={"Accept":"application/rdf+xml"}
|
|
r = requests.get(uri, headers=headers)
|
|
with open(self._local_vocabulary_path+local_file, 'w') as f:
|
|
f.write(r.text)
|
|
|
|
def generate_remote_context(self):
|
|
for uri in self.settings["VOC_URIS"]:
|
|
logger.debug("Generating context for remote "+uri)
|
|
local_name = uri.split("/")[-1]+".rdf"
|
|
self.dereference(uri, local_name)
|
|
self.generate_vocabulary_context(
|
|
local_name,
|
|
self._local_vocabulary_path)
|
|
|
|
def generate_context(self):
|
|
self.generate_local_context()
|
|
self.generate_remote_context()
|
|
self._update_context(('vocabularies',))
|
|
self.save_cache()
|
|
self.readers.save_cache()
|
|
|
|
def generate_output(self, writer):
|
|
for voc in self.vocabularies:
|
|
writer.write_file(
|
|
voc.save_as, self.get_template(voc.template),
|
|
self.context, voc=voc,
|
|
relative_urls=self.settings['RELATIVE_URLS'],
|
|
override_output=hasattr(voc, 'override_save_as'))
|
|
voc_writer_finalized.send(self, writer=writer)
|
|
|
|
class RdfReader(BaseReader):
|
|
|
|
file_extensions = ['rdf', 'owl']
|
|
enabled = bool(rdflib_loaded)
|
|
|
|
def __init__(self, *args, **kwargs):
|
|
super(RdfReader, self).__init__(*args, **kwargs)
|
|
|
|
def read(self, source_path):
|
|
"""Parse content and metadata of an rdf file"""
|
|
logger.debug("Loading graph described in "+source_path)
|
|
graph = rdflib.Graph()
|
|
graph.load(source_path)
|
|
meta = {}
|
|
queries = [
|
|
f for f in listdir(self.settings["VOC_QUERIES_PATH"])
|
|
if (isfile(join(self.settings["VOC_QUERIES_PATH"], f))
|
|
and f.endswith(".sparql"))]
|
|
for query_path in queries:
|
|
query_file_path = self.settings["VOC_QUERIES_PATH"]+"/"+query_path
|
|
with open(query_file_path, "r") as query_file:
|
|
query = query_file.read()
|
|
|
|
# The name of the query identifies the elements in the context
|
|
query_key=query_path.split(".")[0]
|
|
result_set = graph.query(query)
|
|
# Each query result will be stored as a dictionnary in the
|
|
# vocabulary context, referenced by the query name as its key.
|
|
# Multiple results are stored in a list.
|
|
for result in result_set:
|
|
if not query_key in meta.keys():
|
|
meta[query_key]=result.asdict()
|
|
elif type(meta[query_key]) == list:
|
|
meta[query_key].append(result.asdict())
|
|
else:
|
|
meta[query_key]=[meta[query_key], result.asdict()]
|
|
meta["iri"] = meta["lov_metadata"]["iri"]
|
|
meta["description"] = meta["lov_metadata"]["description"]
|
|
meta["version"] = meta["lov_metadata"]["version"]
|
|
meta["title"] = meta["lov_metadata"]["title"]
|
|
return "", meta
|
|
|
|
class Vocabulary(Page):
|
|
mandatory_properties = ('iri','description','version', 'title')
|
|
default_template = 'vocabulary'
|
|
|
|
def add_reader(readers):
|
|
for ext in RdfReader.file_extensions:
|
|
readers.reader_classes[ext] = RdfReader
|
|
|
|
def add_generator(pelican_object):
|
|
print("Adding the generator")
|
|
return VocabularyGenerator
|
|
|
|
|
|
def register():
|
|
signals.get_generators.connect(add_generator)
|
|
signals.readers_init.connect(add_reader)
|