index now generalized template publishing
This commit is contained in:
parent
d4f5aae657
commit
55fbdea410
@ -1,13 +1,22 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
|
|
||||||
from __future__ import print_function
|
from __future__ import print_function
|
||||||
from argparse import ArgumentParser
|
from argparse import ArgumentParser
|
||||||
import json, os, re
|
import sys, json, re, os, urlparse
|
||||||
from urllib import urlencode
|
|
||||||
from urllib2 import urlopen, HTTPError, URLError
|
|
||||||
from jinja2 import FileSystemLoader, Environment
|
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
from urllib import urlencode
|
||||||
|
from urllib2 import HTTPError
|
||||||
|
from jinja2 import FileSystemLoader, Environment
|
||||||
|
from common import *
|
||||||
|
from time import sleep
|
||||||
|
import dateutil.parser
|
||||||
|
|
||||||
|
"""
|
||||||
|
index:
|
||||||
|
Generate pages from etherdumps using a template.
|
||||||
|
|
||||||
|
Built-in templates: rss.xml, index.html
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
def group (items, key=lambda x: x):
|
def group (items, key=lambda x: x):
|
||||||
ret = []
|
ret = []
|
||||||
@ -22,23 +31,68 @@ def group (items, key=lambda x: x):
|
|||||||
ret.append(keys[k])
|
ret.append(keys[k])
|
||||||
return ret
|
return ret
|
||||||
|
|
||||||
|
def base (x):
|
||||||
|
return re.sub(r"(\.raw\.html)|(\.diff\.html)|(\.meta\.json)|(\.raw\.txt)$", "", x)
|
||||||
|
|
||||||
|
def excerpt (t, chars=25):
|
||||||
|
if len(t) > chars:
|
||||||
|
t = t[:chars] + "..."
|
||||||
|
return t
|
||||||
|
|
||||||
|
def absurl (url, base=None):
|
||||||
|
if not url.startswith("http"):
|
||||||
|
return base + url
|
||||||
|
return url
|
||||||
|
|
||||||
|
def url_base (url):
|
||||||
|
(scheme, netloc, path, params, query, fragment) = urlparse.urlparse(url)
|
||||||
|
path, _ = os.path.split(path.lstrip("/"))
|
||||||
|
ret = urlparse.urlunparse((scheme, netloc, path, None, None, None))
|
||||||
|
if ret:
|
||||||
|
ret += "/"
|
||||||
|
return ret
|
||||||
|
|
||||||
def main (args):
|
def main (args):
|
||||||
p = ArgumentParser("")
|
p = ArgumentParser("Convert dumped files to a document via a template.")
|
||||||
|
|
||||||
p.add_argument("input", nargs="+", help="filenames")
|
p.add_argument("input", nargs="+", help="filenames")
|
||||||
p.add_argument("--templates", default=None, help="templates path")
|
p.add_argument("--templatepath", default=None, help="path to find templates, default: built-in")
|
||||||
|
p.add_argument("--template", default="index.html", help="template name, built-ins include index.html, rss.xml; default: index.html")
|
||||||
|
p.add_argument("--padinfo", default=".etherdump/settings.json", help="settings, default: .etherdump/settings.json")
|
||||||
|
# p.add_argument("--zerorevs", default=False, action="store_true", help="include pads with zero revisions, default: False (i.e. pads with no revisions are skipped)")
|
||||||
|
|
||||||
|
p.add_argument("--skip", default=None, type=int, help="skip this many items, default: None")
|
||||||
|
p.add_argument("--order", default="lastedited", help="order, possible values: padid, pad (no group name), lastedited (number of) authors, revisions, default: lastedited")
|
||||||
|
p.add_argument("--reverse", default=False, action="store_true", help="reverse order, default: False (reverse chrono)")
|
||||||
|
p.add_argument("--limit", type=int, default=0, help="limit to number of items, default: 0 (no limit)")
|
||||||
|
|
||||||
|
p.add_argument("--title", default="etherdump", help="title for document or rss feed channel title, default: etherdump")
|
||||||
|
p.add_argument("--description", default="", help="channel description, default: empty")
|
||||||
|
p.add_argument("--language", default="en-US", help="rss: feed language, default: en-US")
|
||||||
|
p.add_argument("--updatePeriod", default="daily", help="rss: updatePeriod, possible values: hourly, daily, weekly, monthly, yearly; default: daily")
|
||||||
|
p.add_argument("--updateFrequency", default=1, type=int, help="rss: update frequency within the update period (where 2 would mean twice per period); default: 1")
|
||||||
|
p.add_argument("--siteurl", default=None, help="rss: to use as channel's site link, default: the etherpad url")
|
||||||
|
p.add_argument("--feedurl", default="feed.xml", help="rss: to use as feeds own (self) link, default: feed.xml")
|
||||||
|
p.add_argument("--generator", default="https://gitlab.com/activearchives/etherdump", help="generator, default: https://gitlab.com/activearchives/etherdump")
|
||||||
|
|
||||||
|
p.add_argument("--content", default=False, action="store_true", help="rss: include content, default: False")
|
||||||
|
p.add_argument("--link", default="diffhtml,html,text", help="version to use as link, can be comma-delim list, use first avail, default: diffhtml,html,text")
|
||||||
|
p.add_argument("--linkbase", default=None, help="base url to use for links, default: try to use the feedurl")
|
||||||
|
|
||||||
args = p.parse_args(args)
|
args = p.parse_args(args)
|
||||||
|
|
||||||
tmpath = args.templates
|
tmpath = args.templatepath
|
||||||
|
# Default path for template is the built-in data/templates
|
||||||
if tmpath == None:
|
if tmpath == None:
|
||||||
tmpath = os.path.split(os.path.abspath(__file__))[0]
|
tmpath = os.path.split(os.path.abspath(__file__))[0]
|
||||||
tmpath = os.path.split(tmpath)[0]
|
tmpath = os.path.split(tmpath)[0]
|
||||||
tmpath = os.path.join(tmpath, "data", "templates")
|
tmpath = os.path.join(tmpath, "data", "templates")
|
||||||
|
|
||||||
env = Environment(loader=FileSystemLoader(tmpath))
|
env = Environment(loader=FileSystemLoader(tmpath))
|
||||||
template = env.get_template("index.html")
|
env.filters["excerpt"] = excerpt
|
||||||
|
template = env.get_template(args.template)
|
||||||
|
|
||||||
def base (x):
|
info = loadpadinfo(args.padinfo)
|
||||||
return re.sub(r"(\.raw\.html)|(\.diff\.html)|(\.meta\.json)|(\.raw\.txt)$", "", x)
|
|
||||||
|
|
||||||
inputs = args.input
|
inputs = args.input
|
||||||
inputs.sort()
|
inputs.sort()
|
||||||
@ -50,23 +104,64 @@ def main(args):
|
|||||||
with open(p) as f:
|
with open(p) as f:
|
||||||
return json.load(f)
|
return json.load(f)
|
||||||
|
|
||||||
inputs = map(loadmeta, inputs)
|
def fixdates (padmeta):
|
||||||
# sort by last edited (reverse)
|
d = dateutil.parser.parse(padmeta["lastedited_iso"])
|
||||||
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
padmeta["lastedited"] = d
|
||||||
print (template.render({"timestamp": timestamp, "pads": inputs}).encode("utf-8"))
|
padmeta["lastedited_822"] = d.strftime("%a, %d %b %Y %H:%M:%S +0000")
|
||||||
|
return padmeta
|
||||||
|
|
||||||
# TODO: MODIFY THIS TO MAKE THE OUTPUT JOINABLE with the collected META DATA
|
pads = map(loadmeta, inputs)
|
||||||
# evt: how can the metadata become a GRAPH structure!!! with each output DOCUMENT
|
pads = map(fixdates, pads)
|
||||||
#
|
args.pads = pads
|
||||||
# print ("<ol>")
|
|
||||||
# for x in inputs:
|
|
||||||
# padid = x
|
|
||||||
# metapath = os.path.join(x, "{0}.meta.json".format(padid))
|
|
||||||
# if os.path.exists(metapath):
|
|
||||||
# print ("""<li><a href="{0}">{0}</a></li>""".format(x))
|
|
||||||
# with open(metapath) as f:
|
|
||||||
# meta = json.load(f)
|
|
||||||
# indexpath = os.path.join(x, "index.html")
|
|
||||||
# with open(indexpath, "w") as f:
|
|
||||||
|
|
||||||
# print ("</ol>")
|
# args.timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
||||||
|
|
||||||
|
padurlbase = re.sub(r"api/1.2.9/$", "p/", info["apiurl"])
|
||||||
|
if type(padurlbase) == unicode:
|
||||||
|
padurlbase = padurlbase.encode("utf-8")
|
||||||
|
args.siteurl = args.siteurl or padurlbase
|
||||||
|
args.utcnow = datetime.utcnow().strftime("%a, %d %b %Y %H:%M:%S +0000")
|
||||||
|
|
||||||
|
# order items & apply limit
|
||||||
|
if args.order == "lastedited":
|
||||||
|
args.pads.sort(key=lambda x: x.get("lastedited_iso"), reverse=args.reverse)
|
||||||
|
elif args.order == "pad":
|
||||||
|
args.pads.sort(key=lambda x: x.get("pad"), reverse=args.reverse)
|
||||||
|
elif args.order == "padid":
|
||||||
|
args.pads.sort(key=lambda x: x.get("padid"), reverse=args.reverse)
|
||||||
|
elif args.order == "revisions":
|
||||||
|
args.pads.sort(key=lambda x: x.get("revisions"), reverse=args.reverse)
|
||||||
|
elif args.order == "authors":
|
||||||
|
args.pads.sort(key=lambda x: len(x.get("authors")), reverse=args.reverse)
|
||||||
|
else:
|
||||||
|
raise Exception("That ordering is not implemented!")
|
||||||
|
|
||||||
|
if args.limit:
|
||||||
|
args.pads = args.pads[:args.limit]
|
||||||
|
|
||||||
|
# add versions_by_type, add in full text
|
||||||
|
# add link (based on args.link)
|
||||||
|
linkversions = args.link.split(",")
|
||||||
|
linkbase = args.linkbase or url_base(args.feedurl)
|
||||||
|
# print ("linkbase", linkbase, args.linkbase, args.feedurl)
|
||||||
|
|
||||||
|
for p in pads:
|
||||||
|
versions_by_type = {}
|
||||||
|
p["versions_by_type"] = versions_by_type
|
||||||
|
for v in p["versions"]:
|
||||||
|
t = v["type"]
|
||||||
|
versions_by_type[t] = v
|
||||||
|
with open (versions_by_type["text"]["path"]) as f:
|
||||||
|
p["text"] = f.read().decode("utf-8")
|
||||||
|
|
||||||
|
# ADD IN LINK
|
||||||
|
for v in linkversions:
|
||||||
|
vdata = versions_by_type[v]
|
||||||
|
try:
|
||||||
|
if v == "pad" or os.path.exists(vdata["path"]):
|
||||||
|
p["link"] = absurl(vdata["url"], linkbase)
|
||||||
|
break
|
||||||
|
except KeyError as e:
|
||||||
|
pass
|
||||||
|
|
||||||
|
print (template.render(vars(args)).encode("utf-8"))
|
||||||
|
@ -1,157 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
from __future__ import print_function
|
|
||||||
from argparse import ArgumentParser
|
|
||||||
import sys, json, re, os, urlparse
|
|
||||||
from datetime import datetime
|
|
||||||
from urllib import urlencode
|
|
||||||
from urllib2 import HTTPError
|
|
||||||
from jinja2 import FileSystemLoader, Environment
|
|
||||||
from common import *
|
|
||||||
from time import sleep
|
|
||||||
import dateutil.parser
|
|
||||||
|
|
||||||
"""
|
|
||||||
rss:
|
|
||||||
Generate an RSS feed from an etherdump.
|
|
||||||
|
|
||||||
|
|
||||||
TODO NEXT
|
|
||||||
add back limit and ordering parameters to create filters to make a latest changes feed!
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
def group (items, key=lambda x: x):
|
|
||||||
ret = []
|
|
||||||
keys = {}
|
|
||||||
for item in items:
|
|
||||||
k = key(item)
|
|
||||||
if k not in keys:
|
|
||||||
keys[k] = []
|
|
||||||
keys[k].append(item)
|
|
||||||
for k in sorted(keys):
|
|
||||||
keys[k].sort()
|
|
||||||
ret.append(keys[k])
|
|
||||||
return ret
|
|
||||||
|
|
||||||
def base (x):
|
|
||||||
return re.sub(r"(\.raw\.html)|(\.diff\.html)|(\.meta\.json)|(\.raw\.txt)$", "", x)
|
|
||||||
|
|
||||||
def excerpt (t, chars=25):
|
|
||||||
if len(t) > chars:
|
|
||||||
t = t[:chars] + "..."
|
|
||||||
return t
|
|
||||||
|
|
||||||
def absurl (url, base=None):
|
|
||||||
if not url.startswith("http"):
|
|
||||||
return base + url
|
|
||||||
return url
|
|
||||||
|
|
||||||
def url_base (url):
|
|
||||||
(scheme, netloc, path, params, query, fragment) = urlparse.urlparse(url)
|
|
||||||
path, _ = os.path.split(path.lstrip("/"))
|
|
||||||
ret = urlparse.urlunparse((scheme, netloc, path, None, None, None))
|
|
||||||
if ret:
|
|
||||||
ret += "/"
|
|
||||||
return ret
|
|
||||||
|
|
||||||
def main (args):
|
|
||||||
p = ArgumentParser("Check for pads that have changed since last sync (according to .meta.json)")
|
|
||||||
|
|
||||||
p.add_argument("input", nargs="+", help="filenames")
|
|
||||||
p.add_argument("--templates", default=None, help="templates path")
|
|
||||||
|
|
||||||
p.add_argument("--padinfo", default=".etherdump/settings.json", help="settings, default: .etherdump/settings.json")
|
|
||||||
p.add_argument("--zerorevs", default=False, action="store_true", help="include pads with zero revisions, default: False (i.e. pads with no revisions are skipped)")
|
|
||||||
|
|
||||||
p.add_argument("--skip", default=None, type=int, help="skip this many items, default: None")
|
|
||||||
p.add_argument("--type", default="recentchanges", help="type of feed, default: recentchanges")
|
|
||||||
|
|
||||||
p.add_argument("--limit", type=int, default=10, help="number of items, default: 10")
|
|
||||||
p.add_argument("--chronological", default=False, action="store_true", help="order chronologically, default: False (reverse chrono)")
|
|
||||||
|
|
||||||
p.add_argument("--title", default="etherpad", help="rss feed channel title, default: etherpad")
|
|
||||||
p.add_argument("--description", default="", help="channel description, default: empty")
|
|
||||||
p.add_argument("--language", default="en-US", help="feed language, default: en-US")
|
|
||||||
p.add_argument("--updatePeriod", default="daily", help="updatePeriod, possible values: hourly, daily, weekly, monthly, yearly; default: daily")
|
|
||||||
p.add_argument("--updateFrequency", default=1, type=int, help="update frequency within the update period (where 2 would mean twice per period); default: 1")
|
|
||||||
p.add_argument("--siteurl", default=None, help="to use as channel's site link, default: the etherpad url")
|
|
||||||
p.add_argument("--feedurl", default="feed.xml", help="to use as feeds own (self) link, default: feed.xml")
|
|
||||||
p.add_argument("--generator", default="https://gitlab.com/activearchives/etherdump", help="generator, default: https://gitlab.com/activearchives/etherdump")
|
|
||||||
|
|
||||||
p.add_argument("--content", default=False, action="store_true", help="include content, default: False")
|
|
||||||
p.add_argument("--link", default="diffhtml,html,text", help="version to use as link, can be comma-delim list, use first avail, default: diffhtml,html,text")
|
|
||||||
p.add_argument("--linkbase", default=None, help="base url to use for links, default: try to use the feedurl")
|
|
||||||
|
|
||||||
args = p.parse_args(args)
|
|
||||||
|
|
||||||
tmpath = args.templates
|
|
||||||
if tmpath == None:
|
|
||||||
tmpath = os.path.split(os.path.abspath(__file__))[0]
|
|
||||||
tmpath = os.path.split(tmpath)[0]
|
|
||||||
tmpath = os.path.join(tmpath, "data", "templates")
|
|
||||||
|
|
||||||
env = Environment(loader=FileSystemLoader(tmpath))
|
|
||||||
env.filters["excerpt"] = excerpt
|
|
||||||
template = env.get_template("rss.xml")
|
|
||||||
|
|
||||||
info = loadpadinfo(args.padinfo)
|
|
||||||
|
|
||||||
inputs = args.input
|
|
||||||
inputs.sort()
|
|
||||||
inputs = group(inputs, base)
|
|
||||||
|
|
||||||
def loadmeta(paths):
|
|
||||||
for p in paths:
|
|
||||||
if p.endswith(".meta.json"):
|
|
||||||
with open(p) as f:
|
|
||||||
return json.load(f)
|
|
||||||
|
|
||||||
def fixdates (padmeta):
|
|
||||||
d = dateutil.parser.parse(padmeta["lastedited_iso"])
|
|
||||||
padmeta["lastedited"] = d
|
|
||||||
padmeta["lastedited_822"] = d.strftime("%a, %d %b %Y %H:%M:%S +0000")
|
|
||||||
return padmeta
|
|
||||||
|
|
||||||
pads = map(loadmeta, inputs)
|
|
||||||
pads = map(fixdates, pads)
|
|
||||||
args.pads = pads
|
|
||||||
|
|
||||||
# args.timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
|
||||||
|
|
||||||
padurlbase = re.sub(r"api/1.2.9/$", "p/", info["apiurl"])
|
|
||||||
if type(padurlbase) == unicode:
|
|
||||||
padurlbase = padurlbase.encode("utf-8")
|
|
||||||
args.siteurl = args.siteurl or padurlbase
|
|
||||||
args.utcnow = datetime.utcnow().strftime("%a, %d %b %Y %H:%M:%S +0000")
|
|
||||||
|
|
||||||
# order items & apply limit
|
|
||||||
args.pads.sort(key=lambda x: x.get("lastedited_iso"), reverse=not args.chronological)
|
|
||||||
if args.limit:
|
|
||||||
args.pads = args.pads[:args.limit]
|
|
||||||
|
|
||||||
# add versions_by_type, add in full text
|
|
||||||
# add link (based on args.link)
|
|
||||||
linkversions = args.link.split(",")
|
|
||||||
linkbase = args.linkbase or url_base(args.feedurl)
|
|
||||||
# print ("linkbase", linkbase, args.linkbase, args.feedurl)
|
|
||||||
|
|
||||||
for p in pads:
|
|
||||||
versions_by_type = {}
|
|
||||||
p["versions_by_type"] = versions_by_type
|
|
||||||
for v in p["versions"]:
|
|
||||||
t = v["type"]
|
|
||||||
versions_by_type[t] = v
|
|
||||||
with open (versions_by_type["text"]["path"]) as f:
|
|
||||||
p["text"] = f.read().decode("utf-8")
|
|
||||||
|
|
||||||
# ADD IN LINK
|
|
||||||
for v in linkversions:
|
|
||||||
vdata = versions_by_type[v]
|
|
||||||
try:
|
|
||||||
if v == "pad" or os.path.exists(vdata["path"]):
|
|
||||||
p["link"] = absurl(vdata["url"], linkbase)
|
|
||||||
break
|
|
||||||
except KeyError as e:
|
|
||||||
pass
|
|
||||||
|
|
||||||
print (template.render(vars(args)).encode("utf-8"))
|
|
Loading…
Reference in New Issue
Block a user