Pumping pads as files into publishing frameworks!
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 

185 lines
7.6 KiB

from __future__ import print_function
from argparse import ArgumentParser
import sys, json, re, os
from datetime import datetime
try:
# python2
from urllib2 import urlopen, URLError, HTTPError
from urllib import urlencode
from urlparse import urlparse, urlunparse
except ImportError:
# python3
from urllib.parse import urlparse, urlunparse, urlencode, quote
from urllib.request import urlopen, URLError, HTTPError
from jinja2 import FileSystemLoader, Environment
from etherdump.commands.common import *
from time import sleep
import dateutil.parser
"""
index:
Generate pages from etherdumps using a template.
Built-in templates: rss.xml, index.html
"""
def group (items, key=lambda x: x):
ret = []
keys = {}
for item in items:
k = key(item)
if k not in keys:
keys[k] = []
keys[k].append(item)
for k in sorted(keys):
keys[k].sort()
ret.append(keys[k])
return ret
def base (x):
return re.sub(r"(\.raw\.html)|(\.diff\.html)|(\.meta\.json)|(\.raw\.txt)$", "", x)
def excerpt (t, chars=25):
if len(t) > chars:
t = t[:chars] + "..."
return t
def absurl (url, base=None):
if not url.startswith("http"):
return base + url
return url
def url_base (url):
(scheme, netloc, path, params, query, fragment) = urlparse(url)
path, _ = os.path.split(path.lstrip("/"))
ret = urlunparse((scheme, netloc, path, None, None, None))
if ret:
ret += "/"
return ret
def main (args):
p = ArgumentParser("Convert dumped files to a document via a template.")
p.add_argument("input", nargs="+", help="filenames (uses .meta.json files)")
p.add_argument("--templatepath", default=None, help="path to find templates, default: built-in")
p.add_argument("--template", default="index.html", help="template name, built-ins include index.html, rss.xml; default: index.html")
p.add_argument("--padinfo", default=".etherdump/settings.json", help="settings, default: ./.etherdump/settings.json")
# p.add_argument("--zerorevs", default=False, action="store_true", help="include pads with zero revisions, default: False (i.e. pads with no revisions are skipped)")
p.add_argument("--order", default="padid", help="order, possible values: padid, pad (no group name), lastedited, (number of) authors, revisions, default: padid")
p.add_argument("--reverse", default=False, action="store_true", help="reverse order, default: False (reverse chrono)")
p.add_argument("--limit", type=int, default=0, help="limit to number of items, default: 0 (no limit)")
p.add_argument("--skip", default=None, type=int, help="skip this many items, default: None")
p.add_argument("--content", default=False, action="store_true", help="rss: include (full) content tag, default: False")
p.add_argument("--link", default="diffhtml,html,text", help="link variable will be to this version, can be comma-delim list, use first avail, default: diffhtml,html,text")
p.add_argument("--linkbase", default=None, help="base url to use for links, default: try to use the feedurl")
p.add_argument("--output", default=None, help="output, default: stdout")
pg = p.add_argument_group('template variables')
pg.add_argument("--feedurl", default="feed.xml", help="rss: to use as feeds own (self) link, default: feed.xml")
pg.add_argument("--siteurl", default=None, help="rss: to use as channel's site link, default: the etherpad url")
pg.add_argument("--title", default="etherdump", help="title for document or rss feed channel title, default: etherdump")
pg.add_argument("--description", default="", help="rss: channel description, default: empty")
pg.add_argument("--language", default="en-US", help="rss: feed language, default: en-US")
pg.add_argument("--updatePeriod", default="daily", help="rss: updatePeriod, possible values: hourly, daily, weekly, monthly, yearly; default: daily")
pg.add_argument("--updateFrequency", default=1, type=int, help="rss: update frequency within the update period (where 2 would mean twice per period); default: 1")
pg.add_argument("--generator", default="https://gitlab.com/activearchives/etherdump", help="generator, default: https://gitlab.com/activearchives/etherdump")
pg.add_argument("--timestamp", default=None, help="timestamp, default: now (e.g. 2015-12-01 12:30:00)")
pg.add_argument("--next", default=None, help="next link, default: None)")
pg.add_argument("--prev", default=None, help="prev link, default: None")
args = p.parse_args(args)
tmpath = args.templatepath
# Default path for template is the built-in data/templates
if tmpath == None:
tmpath = os.path.split(os.path.abspath(__file__))[0]
tmpath = os.path.split(tmpath)[0]
tmpath = os.path.join(tmpath, "data", "templates")
env = Environment(loader=FileSystemLoader(tmpath))
env.filters["excerpt"] = excerpt
template = env.get_template(args.template)
info = loadpadinfo(args.padinfo)
inputs = args.input
inputs.sort()
inputs = group(inputs, base)
def loadmeta(paths):
for p in paths:
if p.endswith(".meta.json"):
with open(p) as f:
return json.load(f)
def fixdates (padmeta):
d = dateutil.parser.parse(padmeta["lastedited_iso"])
padmeta["lastedited"] = d
padmeta["lastedited_822"] = d.strftime("%a, %d %b %Y %H:%M:%S +0000")
return padmeta
pads = map(loadmeta, inputs)
pads = map(fixdates, pads)
args.pads = list(pads)
if args.timestamp == None:
args.timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
padurlbase = re.sub(r"api/1.2.9/$", "p/", info["apiurl"])
# if type(padurlbase) == unicode:
# padurlbase = padurlbase.encode("utf-8")
args.siteurl = args.siteurl or padurlbase
args.utcnow = datetime.utcnow().strftime("%a, %d %b %Y %H:%M:%S +0000")
# order items & apply limit
if args.order == "lastedited":
args.pads.sort(key=lambda x: x.get("lastedited_iso"), reverse=args.reverse)
elif args.order == "pad":
args.pads.sort(key=lambda x: x.get("pad"), reverse=args.reverse)
elif args.order == "padid":
args.pads.sort(key=lambda x: x.get("padid"), reverse=args.reverse)
elif args.order == "revisions":
args.pads.sort(key=lambda x: x.get("revisions"), reverse=args.reverse)
elif args.order == "authors":
args.pads.sort(key=lambda x: len(x.get("authors")), reverse=args.reverse)
else:
raise Exception("That ordering is not implemented!")
if args.limit:
args.pads = args.pads[:args.limit]
# add versions_by_type, add in full text
# add link (based on args.link)
linkversions = args.link.split(",")
linkbase = args.linkbase or url_base(args.feedurl)
# print ("linkbase", linkbase, args.linkbase, args.feedurl)
for p in args.pads:
versions_by_type = {}
p["versions_by_type"] = versions_by_type
for v in p["versions"]:
t = v["type"]
versions_by_type[t] = v
with open (versions_by_type["text"]["path"]) as f:
p["text"] = f.read()
# ADD IN LINK
for v in linkversions:
vdata = versions_by_type[v]
try:
if v == "pad" or os.path.exists(vdata["path"]):
p["link"] = absurl(vdata["url"], linkbase)
break
except KeyError as e:
pass
if args.output:
with open(args.output, "w") as f:
print (template.render(vars(args)), file=f)
else:
print (template.render(vars(args)))