Michael Murtaugh
8 years ago
2 changed files with 126 additions and 188 deletions
@ -1,157 +0,0 @@ |
|||||
#!/usr/bin/env python |
|
||||
from __future__ import print_function |
|
||||
from argparse import ArgumentParser |
|
||||
import sys, json, re, os, urlparse |
|
||||
from datetime import datetime |
|
||||
from urllib import urlencode |
|
||||
from urllib2 import HTTPError |
|
||||
from jinja2 import FileSystemLoader, Environment |
|
||||
from common import * |
|
||||
from time import sleep |
|
||||
import dateutil.parser |
|
||||
|
|
||||
""" |
|
||||
rss: |
|
||||
Generate an RSS feed from an etherdump. |
|
||||
|
|
||||
|
|
||||
TODO NEXT |
|
||||
add back limit and ordering parameters to create filters to make a latest changes feed! |
|
||||
|
|
||||
""" |
|
||||
|
|
||||
def group (items, key=lambda x: x): |
|
||||
ret = [] |
|
||||
keys = {} |
|
||||
for item in items: |
|
||||
k = key(item) |
|
||||
if k not in keys: |
|
||||
keys[k] = [] |
|
||||
keys[k].append(item) |
|
||||
for k in sorted(keys): |
|
||||
keys[k].sort() |
|
||||
ret.append(keys[k]) |
|
||||
return ret |
|
||||
|
|
||||
def base (x): |
|
||||
return re.sub(r"(\.raw\.html)|(\.diff\.html)|(\.meta\.json)|(\.raw\.txt)$", "", x) |
|
||||
|
|
||||
def excerpt (t, chars=25): |
|
||||
if len(t) > chars: |
|
||||
t = t[:chars] + "..." |
|
||||
return t |
|
||||
|
|
||||
def absurl (url, base=None): |
|
||||
if not url.startswith("http"): |
|
||||
return base + url |
|
||||
return url |
|
||||
|
|
||||
def url_base (url): |
|
||||
(scheme, netloc, path, params, query, fragment) = urlparse.urlparse(url) |
|
||||
path, _ = os.path.split(path.lstrip("/")) |
|
||||
ret = urlparse.urlunparse((scheme, netloc, path, None, None, None)) |
|
||||
if ret: |
|
||||
ret += "/" |
|
||||
return ret |
|
||||
|
|
||||
def main (args): |
|
||||
p = ArgumentParser("Check for pads that have changed since last sync (according to .meta.json)") |
|
||||
|
|
||||
p.add_argument("input", nargs="+", help="filenames") |
|
||||
p.add_argument("--templates", default=None, help="templates path") |
|
||||
|
|
||||
p.add_argument("--padinfo", default=".etherdump/settings.json", help="settings, default: .etherdump/settings.json") |
|
||||
p.add_argument("--zerorevs", default=False, action="store_true", help="include pads with zero revisions, default: False (i.e. pads with no revisions are skipped)") |
|
||||
|
|
||||
p.add_argument("--skip", default=None, type=int, help="skip this many items, default: None") |
|
||||
p.add_argument("--type", default="recentchanges", help="type of feed, default: recentchanges") |
|
||||
|
|
||||
p.add_argument("--limit", type=int, default=10, help="number of items, default: 10") |
|
||||
p.add_argument("--chronological", default=False, action="store_true", help="order chronologically, default: False (reverse chrono)") |
|
||||
|
|
||||
p.add_argument("--title", default="etherpad", help="rss feed channel title, default: etherpad") |
|
||||
p.add_argument("--description", default="", help="channel description, default: empty") |
|
||||
p.add_argument("--language", default="en-US", help="feed language, default: en-US") |
|
||||
p.add_argument("--updatePeriod", default="daily", help="updatePeriod, possible values: hourly, daily, weekly, monthly, yearly; default: daily") |
|
||||
p.add_argument("--updateFrequency", default=1, type=int, help="update frequency within the update period (where 2 would mean twice per period); default: 1") |
|
||||
p.add_argument("--siteurl", default=None, help="to use as channel's site link, default: the etherpad url") |
|
||||
p.add_argument("--feedurl", default="feed.xml", help="to use as feeds own (self) link, default: feed.xml") |
|
||||
p.add_argument("--generator", default="https://gitlab.com/activearchives/etherdump", help="generator, default: https://gitlab.com/activearchives/etherdump") |
|
||||
|
|
||||
p.add_argument("--content", default=False, action="store_true", help="include content, default: False") |
|
||||
p.add_argument("--link", default="diffhtml,html,text", help="version to use as link, can be comma-delim list, use first avail, default: diffhtml,html,text") |
|
||||
p.add_argument("--linkbase", default=None, help="base url to use for links, default: try to use the feedurl") |
|
||||
|
|
||||
args = p.parse_args(args) |
|
||||
|
|
||||
tmpath = args.templates |
|
||||
if tmpath == None: |
|
||||
tmpath = os.path.split(os.path.abspath(__file__))[0] |
|
||||
tmpath = os.path.split(tmpath)[0] |
|
||||
tmpath = os.path.join(tmpath, "data", "templates") |
|
||||
|
|
||||
env = Environment(loader=FileSystemLoader(tmpath)) |
|
||||
env.filters["excerpt"] = excerpt |
|
||||
template = env.get_template("rss.xml") |
|
||||
|
|
||||
info = loadpadinfo(args.padinfo) |
|
||||
|
|
||||
inputs = args.input |
|
||||
inputs.sort() |
|
||||
inputs = group(inputs, base) |
|
||||
|
|
||||
def loadmeta(paths): |
|
||||
for p in paths: |
|
||||
if p.endswith(".meta.json"): |
|
||||
with open(p) as f: |
|
||||
return json.load(f) |
|
||||
|
|
||||
def fixdates (padmeta): |
|
||||
d = dateutil.parser.parse(padmeta["lastedited_iso"]) |
|
||||
padmeta["lastedited"] = d |
|
||||
padmeta["lastedited_822"] = d.strftime("%a, %d %b %Y %H:%M:%S +0000") |
|
||||
return padmeta |
|
||||
|
|
||||
pads = map(loadmeta, inputs) |
|
||||
pads = map(fixdates, pads) |
|
||||
args.pads = pads |
|
||||
|
|
||||
# args.timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S") |
|
||||
|
|
||||
padurlbase = re.sub(r"api/1.2.9/$", "p/", info["apiurl"]) |
|
||||
if type(padurlbase) == unicode: |
|
||||
padurlbase = padurlbase.encode("utf-8") |
|
||||
args.siteurl = args.siteurl or padurlbase |
|
||||
args.utcnow = datetime.utcnow().strftime("%a, %d %b %Y %H:%M:%S +0000") |
|
||||
|
|
||||
# order items & apply limit |
|
||||
args.pads.sort(key=lambda x: x.get("lastedited_iso"), reverse=not args.chronological) |
|
||||
if args.limit: |
|
||||
args.pads = args.pads[:args.limit] |
|
||||
|
|
||||
# add versions_by_type, add in full text |
|
||||
# add link (based on args.link) |
|
||||
linkversions = args.link.split(",") |
|
||||
linkbase = args.linkbase or url_base(args.feedurl) |
|
||||
# print ("linkbase", linkbase, args.linkbase, args.feedurl) |
|
||||
|
|
||||
for p in pads: |
|
||||
versions_by_type = {} |
|
||||
p["versions_by_type"] = versions_by_type |
|
||||
for v in p["versions"]: |
|
||||
t = v["type"] |
|
||||
versions_by_type[t] = v |
|
||||
with open (versions_by_type["text"]["path"]) as f: |
|
||||
p["text"] = f.read().decode("utf-8") |
|
||||
|
|
||||
# ADD IN LINK |
|
||||
for v in linkversions: |
|
||||
vdata = versions_by_type[v] |
|
||||
try: |
|
||||
if v == "pad" or os.path.exists(vdata["path"]): |
|
||||
p["link"] = absurl(vdata["url"], linkbase) |
|
||||
break |
|
||||
except KeyError as e: |
|
||||
pass |
|
||||
|
|
||||
print (template.render(vars(args)).encode("utf-8")) |
|
Loading…
Reference in new issue