rss feed from files

This commit is contained in:
Michael Murtaugh 2016-01-14 17:57:41 +01:00
parent 62f95c6c78
commit 3ee4b8f77c
2 changed files with 66 additions and 16 deletions

View File

@ -1,7 +1,7 @@
#!/usr/bin/env python #!/usr/bin/env python
from __future__ import print_function from __future__ import print_function
from argparse import ArgumentParser from argparse import ArgumentParser
import sys, json, re, os import sys, json, re, os, urlparse
from datetime import datetime from datetime import datetime
from urllib import urlencode from urllib import urlencode
from urllib2 import HTTPError from urllib2 import HTTPError
@ -36,6 +36,24 @@ def group (items, key=lambda x: x):
def base (x): def base (x):
return re.sub(r"(\.raw\.html)|(\.diff\.html)|(\.meta\.json)|(\.raw\.txt)$", "", x) return re.sub(r"(\.raw\.html)|(\.diff\.html)|(\.meta\.json)|(\.raw\.txt)$", "", x)
def excerpt (t, chars=25):
if len(t) > chars:
t = t[:chars] + "..."
return t
def absurl (url, base=None):
if not url.startswith("http"):
return base + url
return url
def url_base (url):
(scheme, netloc, path, params, query, fragment) = urlparse.urlparse(url)
path, _ = os.path.split(path.lstrip("/"))
ret = urlparse.urlunparse((scheme, netloc, path, None, None, None))
if ret:
ret += "/"
return ret
def main (args): def main (args):
p = ArgumentParser("Check for pads that have changed since last sync (according to .meta.json)") p = ArgumentParser("Check for pads that have changed since last sync (according to .meta.json)")
@ -46,19 +64,24 @@ def main (args):
p.add_argument("--zerorevs", default=False, action="store_true", help="include pads with zero revisions, default: False (i.e. pads with no revisions are skipped)") p.add_argument("--zerorevs", default=False, action="store_true", help="include pads with zero revisions, default: False (i.e. pads with no revisions are skipped)")
p.add_argument("--skip", default=None, type=int, help="skip this many items, default: None") p.add_argument("--skip", default=None, type=int, help="skip this many items, default: None")
p.add_argument("--type", default="lastchanges", help="type of feed, default: lastchanges") p.add_argument("--type", default="recentchanges", help="type of feed, default: recentchanges")
p.add_argument("--limit", type=int, default=10, help="number of items, default: 10")
p.add_argument("--chronological", default=False, action="store_true", help="order chronologically, default: False (reverse chrono)")
p.add_argument("--title", default="etherpad", help="rss feed channel title, default: etherpad") p.add_argument("--title", default="etherpad", help="rss feed channel title, default: etherpad")
p.add_argument("--description", default="", help="channel description, default: empty") p.add_argument("--description", default="", help="channel description, default: empty")
p.add_argument("--language", default="en-US", help="feed language, default: en-US") p.add_argument("--language", default="en-US", help="feed language, default: en-US")
p.add_argument("--updatePeriod", default="daily", help="updatePeriod, possible values: hourly, daily, weekly, monthly, yearly; default: daily") p.add_argument("--updatePeriod", default="daily", help="updatePeriod, possible values: hourly, daily, weekly, monthly, yearly; default: daily")
p.add_argument("--updateFrequency", default=1, type=int, help="update frequency within the update period (where 2 would mean twice per period); default: 1") p.add_argument("--updateFrequency", default=1, type=int, help="update frequency within the update period (where 2 would mean twice per period); default: 1")
p.add_argument("--padurl", default=None, help="to use as channel link, default: padurl") p.add_argument("--siteurl", default=None, help="to use as channel's site link, default: the etherpad url")
p.add_argument("--feedurl", default="feed.xml", help="to use as feeds own (self) link, default: feed.xml") p.add_argument("--feedurl", default="feed.xml", help="to use as feeds own (self) link, default: feed.xml")
p.add_argument("--generator", default="https://gitlab.com/activearchives/etherdump", help="generator, default: https://gitlab.com/activearchives/etherdump") p.add_argument("--generator", default="https://gitlab.com/activearchives/etherdump", help="generator, default: https://gitlab.com/activearchives/etherdump")
p.add_argument("--itemlink", default="pad", help="item to link to in feed, possible values: pad, text, html, dhtml; default: pad")
p.add_argument("--chronological", default=False, action="store_true", help="order chronologically, default: False") p.add_argument("--content", default=False, action="store_true", help="include content, default: False")
p.add_argument("--link", default="diffhtml,html,text", help="version to use as link, can be comma-delim list, use first avail, default: diffhtml,html,text")
p.add_argument("--linkbase", default=None, help="base url to use for links, default: try to use the feedurl")
args = p.parse_args(args) args = p.parse_args(args)
tmpath = args.templates tmpath = args.templates
@ -68,6 +91,7 @@ def main (args):
tmpath = os.path.join(tmpath, "data", "templates") tmpath = os.path.join(tmpath, "data", "templates")
env = Environment(loader=FileSystemLoader(tmpath)) env = Environment(loader=FileSystemLoader(tmpath))
env.filters["excerpt"] = excerpt
template = env.get_template("rss.xml") template = env.get_template("rss.xml")
info = loadpadinfo(args.padinfo) info = loadpadinfo(args.padinfo)
@ -97,10 +121,37 @@ def main (args):
padurlbase = re.sub(r"api/1.2.9/$", "p/", info["apiurl"]) padurlbase = re.sub(r"api/1.2.9/$", "p/", info["apiurl"])
if type(padurlbase) == unicode: if type(padurlbase) == unicode:
padurlbase = padurlbase.encode("utf-8") padurlbase = padurlbase.encode("utf-8")
args.padurl = args.padurl or padurlbase args.siteurl = args.siteurl or padurlbase
args.utcnow = datetime.utcnow().strftime("%a, %d %b %Y %H:%M:%S +0000") args.utcnow = datetime.utcnow().strftime("%a, %d %b %Y %H:%M:%S +0000")
# from pprint import pprint # order items & apply limit
# pprint(inputs) args.pads.sort(key=lambda x: x.get("lastedited_iso"), reverse=not args.chronological)
if args.limit:
args.pads = args.pads[:args.limit]
# add versions_by_type, add in full text
# add link (based on args.link)
linkversions = args.link.split(",")
linkbase = args.linkbase or url_base(args.feedurl)
# print ("linkbase", linkbase, args.linkbase, args.feedurl)
for p in pads:
versions_by_type = {}
p["versions_by_type"] = versions_by_type
for v in p["versions"]:
t = v["type"]
versions_by_type[t] = v
with open (versions_by_type["text"]["path"]) as f:
p["text"] = f.read().decode("utf-8")
# ADD IN LINK
for v in linkversions:
vdata = versions_by_type[v]
try:
if v == "pad" or os.path.exists(vdata["path"]):
p["link"] = absurl(vdata["url"], linkbase)
break
except KeyError as e:
pass
print (template.render(vars(args)).encode("utf-8")) print (template.render(vars(args)).encode("utf-8"))

View File

@ -10,7 +10,7 @@
<channel> <channel>
<title>{{title}}</title> <title>{{title}}</title>
<atom:link href="{{feedurl}}" rel="self" type="application/rss+xml" /> <atom:link href="{{feedurl}}" rel="self" type="application/rss+xml" />
<link>{{padurl}}</link> <link>{{siteurl}}</link>
<description>{{description}}</description> <description>{{description}}</description>
<lastBuildDate>{{utcnow}}</lastBuildDate> <lastBuildDate>{{utcnow}}</lastBuildDate>
<language>{{language}}</language> <language>{{language}}</language>
@ -21,14 +21,13 @@
{% for p in pads %} {% for p in pads %}
<item> <item>
<title>{{p.pad}}</title> <title>{{p.pad}}</title>
<link>{{p.versions[0].url}}</link> <link>{{p.link}}</link>
<pubDate>{{p.lastedited_822}}</pubDate> <pubDate>{{p.lastedited_822}}</pubDate>
<dc:creator><![CDATA[-]]></dc:creator> <guid isPermaLink="false">{{p.link}}</guid>
<category><![CDATA[-]]></category> <description><![CDATA[{{p.text|excerpt(100)}}]]></description>
<guid isPermaLink="false">{{p.versions[0].url}}</guid> {% if content %}<content:encoded><![CDATA[{{p.text}}]]></content:encoded>{% endif %}
<description><![CDATA[{{p.description}}]]></description>
<content:encoded><![CDATA[{{p.content}}]]></content:encoded>
</item> </item>
{% endfor %} {% endfor %}
</channel> </channel>
</rss>