diff --git a/etherdump/commands/rss.py b/etherdump/commands/rss.py new file mode 100644 index 0000000..1b53f55 --- /dev/null +++ b/etherdump/commands/rss.py @@ -0,0 +1,101 @@ +#!/usr/bin/env python +from __future__ import print_function +from argparse import ArgumentParser +import sys, json, re, os +from datetime import datetime +from urllib import urlencode +from urllib2 import HTTPError +from jinja2 import FileSystemLoader, Environment +from common import * +from time import sleep +import dateutil.parser + +""" +rss: + Generate an RSS feed from an etherdump. +""" + +def group (items, key=lambda x: x): + ret = [] + keys = {} + for item in items: + k = key(item) + if k not in keys: + keys[k] = [] + keys[k].append(item) + for k in sorted(keys): + keys[k].sort() + ret.append(keys[k]) + return ret + +def base (x): + return re.sub(r"(\.raw\.html)|(\.diff\.html)|(\.meta\.json)|(\.raw\.txt)$", "", x) + +def main (args): + p = ArgumentParser("Check for pads that have changed since last sync (according to .meta.json)") + + p.add_argument("input", nargs="+", help="filenames") + p.add_argument("--templates", default=None, help="templates path") + + p.add_argument("--padinfo", default=".etherdump/settings.json", help="settings, default: .etherdump/settings.json") + p.add_argument("--zerorevs", default=False, action="store_true", help="include pads with zero revisions, default: False (i.e. pads with no revisions are skipped)") + + p.add_argument("--skip", default=None, type=int, help="skip this many items, default: None") + p.add_argument("--type", default="lastchanges", help="type of feed, default: lastchanges") + + p.add_argument("--title", default="etherpad", help="rss feed channel title, default: etherpad") + p.add_argument("--description", default="", help="channel description, default: empty") + p.add_argument("--language", default="en-US", help="feed language, default: en-US") + p.add_argument("--updatePeriod", default="daily", help="updatePeriod, possible values: hourly, daily, weekly, monthly, yearly; default: daily") + p.add_argument("--updateFrequency", default=1, type=int, help="update frequency within the update period (where 2 would mean twice per period); default: 1") + p.add_argument("--padurl", default=None, help="to use as channel link, default: padurl") + p.add_argument("--feedurl", default="feed.xml", help="to use as feeds own (self) link, default: feed.xml") + p.add_argument("--generator", default="https://gitlab.com/activearchives/etherdump", help="generator, default: https://gitlab.com/activearchives/etherdump") + p.add_argument("--itemlink", default="pad", help="item to link to in feed, possible values: pad, text, html, dhtml; default: pad") + + p.add_argument("--chronological", default=False, action="store_true", help="order chronologically, default: False") + args = p.parse_args(args) + + tmpath = args.templates + if tmpath == None: + tmpath = os.path.split(os.path.abspath(__file__))[0] + tmpath = os.path.split(tmpath)[0] + tmpath = os.path.join(tmpath, "data", "templates") + + env = Environment(loader=FileSystemLoader(tmpath)) + template = env.get_template("rss.xml") + + info = loadpadinfo(args.padinfo) + + inputs = args.input + inputs.sort() + inputs = group(inputs, base) + + def loadmeta(paths): + for p in paths: + if p.endswith(".meta.json"): + with open(p) as f: + return json.load(f) + + def fixdates (padmeta): + d = dateutil.parser.parse(padmeta["lastedited_iso"]) + padmeta["lastedited"] = d + padmeta["lastedited_822"] = d.strftime("%a, %d %b %Y %H:%M:%S +0000") + return padmeta + + pads = map(loadmeta, inputs) + pads = map(fixdates, pads) + args.pads = pads + + # args.timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S") + + padurlbase = re.sub(r"api/1.2.9/$", "p/", info["apiurl"]) + if type(padurlbase) == unicode: + padurlbase = padurlbase.encode("utf-8") + args.padurl = args.padurl or padurlbase + args.utcnow = datetime.utcnow().strftime("%a, %d %b %Y %H:%M:%S +0000") + + # from pprint import pprint + # pprint(inputs) + + print (template.render(vars(args)).encode("utf-8")) diff --git a/etherdump/data/templates/rss.xml b/etherdump/data/templates/rss.xml new file mode 100644 index 0000000..0592c1c --- /dev/null +++ b/etherdump/data/templates/rss.xml @@ -0,0 +1,34 @@ + + + + {{title}} + + {{padurl}} + {{description}} + {{utcnow}} + {{language}} + {{updatePeriod}} + {{updateFrequency}} + {{generator}} + +{% for p in pads %} + + {{p.pad}} + {{p.versions[0].url}} + {{p.lastedited_822}} + + + {{p.versions[0].url}} + + + +{% endfor %} + + \ No newline at end of file