diff --git a/etherdump/commands/rss.py b/etherdump/commands/rss.py index 63f0f27..7ce86bf 100644 --- a/etherdump/commands/rss.py +++ b/etherdump/commands/rss.py @@ -1,7 +1,7 @@ #!/usr/bin/env python from __future__ import print_function from argparse import ArgumentParser -import sys, json, re, os +import sys, json, re, os, urlparse from datetime import datetime from urllib import urlencode from urllib2 import HTTPError @@ -36,6 +36,24 @@ def group (items, key=lambda x: x): def base (x): return re.sub(r"(\.raw\.html)|(\.diff\.html)|(\.meta\.json)|(\.raw\.txt)$", "", x) +def excerpt (t, chars=25): + if len(t) > chars: + t = t[:chars] + "..." + return t + +def absurl (url, base=None): + if not url.startswith("http"): + return base + url + return url + +def url_base (url): + (scheme, netloc, path, params, query, fragment) = urlparse.urlparse(url) + path, _ = os.path.split(path.lstrip("/")) + ret = urlparse.urlunparse((scheme, netloc, path, None, None, None)) + if ret: + ret += "/" + return ret + def main (args): p = ArgumentParser("Check for pads that have changed since last sync (according to .meta.json)") @@ -46,19 +64,24 @@ def main (args): p.add_argument("--zerorevs", default=False, action="store_true", help="include pads with zero revisions, default: False (i.e. pads with no revisions are skipped)") p.add_argument("--skip", default=None, type=int, help="skip this many items, default: None") - p.add_argument("--type", default="lastchanges", help="type of feed, default: lastchanges") + p.add_argument("--type", default="recentchanges", help="type of feed, default: recentchanges") + + p.add_argument("--limit", type=int, default=10, help="number of items, default: 10") + p.add_argument("--chronological", default=False, action="store_true", help="order chronologically, default: False (reverse chrono)") p.add_argument("--title", default="etherpad", help="rss feed channel title, default: etherpad") p.add_argument("--description", default="", help="channel description, default: empty") p.add_argument("--language", default="en-US", help="feed language, default: en-US") p.add_argument("--updatePeriod", default="daily", help="updatePeriod, possible values: hourly, daily, weekly, monthly, yearly; default: daily") p.add_argument("--updateFrequency", default=1, type=int, help="update frequency within the update period (where 2 would mean twice per period); default: 1") - p.add_argument("--padurl", default=None, help="to use as channel link, default: padurl") + p.add_argument("--siteurl", default=None, help="to use as channel's site link, default: the etherpad url") p.add_argument("--feedurl", default="feed.xml", help="to use as feeds own (self) link, default: feed.xml") p.add_argument("--generator", default="https://gitlab.com/activearchives/etherdump", help="generator, default: https://gitlab.com/activearchives/etherdump") - p.add_argument("--itemlink", default="pad", help="item to link to in feed, possible values: pad, text, html, dhtml; default: pad") - p.add_argument("--chronological", default=False, action="store_true", help="order chronologically, default: False") + p.add_argument("--content", default=False, action="store_true", help="include content, default: False") + p.add_argument("--link", default="diffhtml,html,text", help="version to use as link, can be comma-delim list, use first avail, default: diffhtml,html,text") + p.add_argument("--linkbase", default=None, help="base url to use for links, default: try to use the feedurl") + args = p.parse_args(args) tmpath = args.templates @@ -68,6 +91,7 @@ def main (args): tmpath = os.path.join(tmpath, "data", "templates") env = Environment(loader=FileSystemLoader(tmpath)) + env.filters["excerpt"] = excerpt template = env.get_template("rss.xml") info = loadpadinfo(args.padinfo) @@ -97,10 +121,37 @@ def main (args): padurlbase = re.sub(r"api/1.2.9/$", "p/", info["apiurl"]) if type(padurlbase) == unicode: padurlbase = padurlbase.encode("utf-8") - args.padurl = args.padurl or padurlbase + args.siteurl = args.siteurl or padurlbase args.utcnow = datetime.utcnow().strftime("%a, %d %b %Y %H:%M:%S +0000") - # from pprint import pprint - # pprint(inputs) + # order items & apply limit + args.pads.sort(key=lambda x: x.get("lastedited_iso"), reverse=not args.chronological) + if args.limit: + args.pads = args.pads[:args.limit] + + # add versions_by_type, add in full text + # add link (based on args.link) + linkversions = args.link.split(",") + linkbase = args.linkbase or url_base(args.feedurl) + # print ("linkbase", linkbase, args.linkbase, args.feedurl) + + for p in pads: + versions_by_type = {} + p["versions_by_type"] = versions_by_type + for v in p["versions"]: + t = v["type"] + versions_by_type[t] = v + with open (versions_by_type["text"]["path"]) as f: + p["text"] = f.read().decode("utf-8") + + # ADD IN LINK + for v in linkversions: + vdata = versions_by_type[v] + try: + if v == "pad" or os.path.exists(vdata["path"]): + p["link"] = absurl(vdata["url"], linkbase) + break + except KeyError as e: + pass print (template.render(vars(args)).encode("utf-8")) diff --git a/etherdump/data/templates/rss.xml b/etherdump/data/templates/rss.xml index 0592c1c..8c7e829 100644 --- a/etherdump/data/templates/rss.xml +++ b/etherdump/data/templates/rss.xml @@ -10,7 +10,7 @@ {{title}} - {{padurl}} + {{siteurl}} {{description}} {{utcnow}} {{language}} @@ -21,14 +21,13 @@ {% for p in pads %} {{p.pad}} - {{p.versions[0].url}} + {{p.link}} {{p.lastedited_822}} - - - {{p.versions[0].url}} - - + {{p.link}} + + {% if content %}{% endif %} {% endfor %} - \ No newline at end of file + +