diff --git a/etherdump/commands/index.py b/etherdump/commands/index.py
index f2490e4..78e7ad3 100644
--- a/etherdump/commands/index.py
+++ b/etherdump/commands/index.py
@@ -1,13 +1,22 @@
#!/usr/bin/env python
-
from __future__ import print_function
from argparse import ArgumentParser
-import json, os, re
+import sys, json, re, os, urlparse
+from datetime import datetime
from urllib import urlencode
-from urllib2 import urlopen, HTTPError, URLError
+from urllib2 import HTTPError
from jinja2 import FileSystemLoader, Environment
-from datetime import datetime
+from common import *
+from time import sleep
+import dateutil.parser
+"""
+index:
+ Generate pages from etherdumps using a template.
+
+ Built-in templates: rss.xml, index.html
+
+"""
def group (items, key=lambda x: x):
ret = []
@@ -22,23 +31,68 @@ def group (items, key=lambda x: x):
ret.append(keys[k])
return ret
-def main(args):
- p = ArgumentParser("")
+def base (x):
+ return re.sub(r"(\.raw\.html)|(\.diff\.html)|(\.meta\.json)|(\.raw\.txt)$", "", x)
+
+def excerpt (t, chars=25):
+ if len(t) > chars:
+ t = t[:chars] + "..."
+ return t
+
+def absurl (url, base=None):
+ if not url.startswith("http"):
+ return base + url
+ return url
+
+def url_base (url):
+ (scheme, netloc, path, params, query, fragment) = urlparse.urlparse(url)
+ path, _ = os.path.split(path.lstrip("/"))
+ ret = urlparse.urlunparse((scheme, netloc, path, None, None, None))
+ if ret:
+ ret += "/"
+ return ret
+
+def main (args):
+ p = ArgumentParser("Convert dumped files to a document via a template.")
+
p.add_argument("input", nargs="+", help="filenames")
- p.add_argument("--templates", default=None, help="templates path")
+ p.add_argument("--templatepath", default=None, help="path to find templates, default: built-in")
+ p.add_argument("--template", default="index.html", help="template name, built-ins include index.html, rss.xml; default: index.html")
+ p.add_argument("--padinfo", default=".etherdump/settings.json", help="settings, default: .etherdump/settings.json")
+ # p.add_argument("--zerorevs", default=False, action="store_true", help="include pads with zero revisions, default: False (i.e. pads with no revisions are skipped)")
+
+ p.add_argument("--skip", default=None, type=int, help="skip this many items, default: None")
+ p.add_argument("--order", default="lastedited", help="order, possible values: padid, pad (no group name), lastedited (number of) authors, revisions, default: lastedited")
+ p.add_argument("--reverse", default=False, action="store_true", help="reverse order, default: False (reverse chrono)")
+ p.add_argument("--limit", type=int, default=0, help="limit to number of items, default: 0 (no limit)")
+
+ p.add_argument("--title", default="etherdump", help="title for document or rss feed channel title, default: etherdump")
+ p.add_argument("--description", default="", help="channel description, default: empty")
+ p.add_argument("--language", default="en-US", help="rss: feed language, default: en-US")
+ p.add_argument("--updatePeriod", default="daily", help="rss: updatePeriod, possible values: hourly, daily, weekly, monthly, yearly; default: daily")
+ p.add_argument("--updateFrequency", default=1, type=int, help="rss: update frequency within the update period (where 2 would mean twice per period); default: 1")
+ p.add_argument("--siteurl", default=None, help="rss: to use as channel's site link, default: the etherpad url")
+ p.add_argument("--feedurl", default="feed.xml", help="rss: to use as feeds own (self) link, default: feed.xml")
+ p.add_argument("--generator", default="https://gitlab.com/activearchives/etherdump", help="generator, default: https://gitlab.com/activearchives/etherdump")
+
+ p.add_argument("--content", default=False, action="store_true", help="rss: include content, default: False")
+ p.add_argument("--link", default="diffhtml,html,text", help="version to use as link, can be comma-delim list, use first avail, default: diffhtml,html,text")
+ p.add_argument("--linkbase", default=None, help="base url to use for links, default: try to use the feedurl")
+
args = p.parse_args(args)
- tmpath = args.templates
+ tmpath = args.templatepath
+ # Default path for template is the built-in data/templates
if tmpath == None:
tmpath = os.path.split(os.path.abspath(__file__))[0]
tmpath = os.path.split(tmpath)[0]
tmpath = os.path.join(tmpath, "data", "templates")
env = Environment(loader=FileSystemLoader(tmpath))
- template = env.get_template("index.html")
+ env.filters["excerpt"] = excerpt
+ template = env.get_template(args.template)
- def base (x):
- return re.sub(r"(\.raw\.html)|(\.diff\.html)|(\.meta\.json)|(\.raw\.txt)$", "", x)
+ info = loadpadinfo(args.padinfo)
inputs = args.input
inputs.sort()
@@ -50,23 +104,64 @@ def main(args):
with open(p) as f:
return json.load(f)
- inputs = map(loadmeta, inputs)
- # sort by last edited (reverse)
- timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
- print (template.render({"timestamp": timestamp, "pads": inputs}).encode("utf-8"))
-
- # TODO: MODIFY THIS TO MAKE THE OUTPUT JOINABLE with the collected META DATA
- # evt: how can the metadata become a GRAPH structure!!! with each output DOCUMENT
- #
- # print ("
")
- # for x in inputs:
- # padid = x
- # metapath = os.path.join(x, "{0}.meta.json".format(padid))
- # if os.path.exists(metapath):
- # print ("""- {0}
""".format(x))
- # with open(metapath) as f:
- # meta = json.load(f)
- # indexpath = os.path.join(x, "index.html")
- # with open(indexpath, "w") as f:
-
- # print ("
")
+ def fixdates (padmeta):
+ d = dateutil.parser.parse(padmeta["lastedited_iso"])
+ padmeta["lastedited"] = d
+ padmeta["lastedited_822"] = d.strftime("%a, %d %b %Y %H:%M:%S +0000")
+ return padmeta
+
+ pads = map(loadmeta, inputs)
+ pads = map(fixdates, pads)
+ args.pads = pads
+
+ # args.timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+
+ padurlbase = re.sub(r"api/1.2.9/$", "p/", info["apiurl"])
+ if type(padurlbase) == unicode:
+ padurlbase = padurlbase.encode("utf-8")
+ args.siteurl = args.siteurl or padurlbase
+ args.utcnow = datetime.utcnow().strftime("%a, %d %b %Y %H:%M:%S +0000")
+
+ # order items & apply limit
+ if args.order == "lastedited":
+ args.pads.sort(key=lambda x: x.get("lastedited_iso"), reverse=args.reverse)
+ elif args.order == "pad":
+ args.pads.sort(key=lambda x: x.get("pad"), reverse=args.reverse)
+ elif args.order == "padid":
+ args.pads.sort(key=lambda x: x.get("padid"), reverse=args.reverse)
+ elif args.order == "revisions":
+ args.pads.sort(key=lambda x: x.get("revisions"), reverse=args.reverse)
+ elif args.order == "authors":
+ args.pads.sort(key=lambda x: len(x.get("authors")), reverse=args.reverse)
+ else:
+ raise Exception("That ordering is not implemented!")
+
+ if args.limit:
+ args.pads = args.pads[:args.limit]
+
+ # add versions_by_type, add in full text
+ # add link (based on args.link)
+ linkversions = args.link.split(",")
+ linkbase = args.linkbase or url_base(args.feedurl)
+ # print ("linkbase", linkbase, args.linkbase, args.feedurl)
+
+ for p in pads:
+ versions_by_type = {}
+ p["versions_by_type"] = versions_by_type
+ for v in p["versions"]:
+ t = v["type"]
+ versions_by_type[t] = v
+ with open (versions_by_type["text"]["path"]) as f:
+ p["text"] = f.read().decode("utf-8")
+
+ # ADD IN LINK
+ for v in linkversions:
+ vdata = versions_by_type[v]
+ try:
+ if v == "pad" or os.path.exists(vdata["path"]):
+ p["link"] = absurl(vdata["url"], linkbase)
+ break
+ except KeyError as e:
+ pass
+
+ print (template.render(vars(args)).encode("utf-8"))
diff --git a/etherdump/commands/rss.py b/etherdump/commands/rss.py
deleted file mode 100644
index 7ce86bf..0000000
--- a/etherdump/commands/rss.py
+++ /dev/null
@@ -1,157 +0,0 @@
-#!/usr/bin/env python
-from __future__ import print_function
-from argparse import ArgumentParser
-import sys, json, re, os, urlparse
-from datetime import datetime
-from urllib import urlencode
-from urllib2 import HTTPError
-from jinja2 import FileSystemLoader, Environment
-from common import *
-from time import sleep
-import dateutil.parser
-
-"""
-rss:
- Generate an RSS feed from an etherdump.
-
-
-TODO NEXT
-add back limit and ordering parameters to create filters to make a latest changes feed!
-
-"""
-
-def group (items, key=lambda x: x):
- ret = []
- keys = {}
- for item in items:
- k = key(item)
- if k not in keys:
- keys[k] = []
- keys[k].append(item)
- for k in sorted(keys):
- keys[k].sort()
- ret.append(keys[k])
- return ret
-
-def base (x):
- return re.sub(r"(\.raw\.html)|(\.diff\.html)|(\.meta\.json)|(\.raw\.txt)$", "", x)
-
-def excerpt (t, chars=25):
- if len(t) > chars:
- t = t[:chars] + "..."
- return t
-
-def absurl (url, base=None):
- if not url.startswith("http"):
- return base + url
- return url
-
-def url_base (url):
- (scheme, netloc, path, params, query, fragment) = urlparse.urlparse(url)
- path, _ = os.path.split(path.lstrip("/"))
- ret = urlparse.urlunparse((scheme, netloc, path, None, None, None))
- if ret:
- ret += "/"
- return ret
-
-def main (args):
- p = ArgumentParser("Check for pads that have changed since last sync (according to .meta.json)")
-
- p.add_argument("input", nargs="+", help="filenames")
- p.add_argument("--templates", default=None, help="templates path")
-
- p.add_argument("--padinfo", default=".etherdump/settings.json", help="settings, default: .etherdump/settings.json")
- p.add_argument("--zerorevs", default=False, action="store_true", help="include pads with zero revisions, default: False (i.e. pads with no revisions are skipped)")
-
- p.add_argument("--skip", default=None, type=int, help="skip this many items, default: None")
- p.add_argument("--type", default="recentchanges", help="type of feed, default: recentchanges")
-
- p.add_argument("--limit", type=int, default=10, help="number of items, default: 10")
- p.add_argument("--chronological", default=False, action="store_true", help="order chronologically, default: False (reverse chrono)")
-
- p.add_argument("--title", default="etherpad", help="rss feed channel title, default: etherpad")
- p.add_argument("--description", default="", help="channel description, default: empty")
- p.add_argument("--language", default="en-US", help="feed language, default: en-US")
- p.add_argument("--updatePeriod", default="daily", help="updatePeriod, possible values: hourly, daily, weekly, monthly, yearly; default: daily")
- p.add_argument("--updateFrequency", default=1, type=int, help="update frequency within the update period (where 2 would mean twice per period); default: 1")
- p.add_argument("--siteurl", default=None, help="to use as channel's site link, default: the etherpad url")
- p.add_argument("--feedurl", default="feed.xml", help="to use as feeds own (self) link, default: feed.xml")
- p.add_argument("--generator", default="https://gitlab.com/activearchives/etherdump", help="generator, default: https://gitlab.com/activearchives/etherdump")
-
- p.add_argument("--content", default=False, action="store_true", help="include content, default: False")
- p.add_argument("--link", default="diffhtml,html,text", help="version to use as link, can be comma-delim list, use first avail, default: diffhtml,html,text")
- p.add_argument("--linkbase", default=None, help="base url to use for links, default: try to use the feedurl")
-
- args = p.parse_args(args)
-
- tmpath = args.templates
- if tmpath == None:
- tmpath = os.path.split(os.path.abspath(__file__))[0]
- tmpath = os.path.split(tmpath)[0]
- tmpath = os.path.join(tmpath, "data", "templates")
-
- env = Environment(loader=FileSystemLoader(tmpath))
- env.filters["excerpt"] = excerpt
- template = env.get_template("rss.xml")
-
- info = loadpadinfo(args.padinfo)
-
- inputs = args.input
- inputs.sort()
- inputs = group(inputs, base)
-
- def loadmeta(paths):
- for p in paths:
- if p.endswith(".meta.json"):
- with open(p) as f:
- return json.load(f)
-
- def fixdates (padmeta):
- d = dateutil.parser.parse(padmeta["lastedited_iso"])
- padmeta["lastedited"] = d
- padmeta["lastedited_822"] = d.strftime("%a, %d %b %Y %H:%M:%S +0000")
- return padmeta
-
- pads = map(loadmeta, inputs)
- pads = map(fixdates, pads)
- args.pads = pads
-
- # args.timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
-
- padurlbase = re.sub(r"api/1.2.9/$", "p/", info["apiurl"])
- if type(padurlbase) == unicode:
- padurlbase = padurlbase.encode("utf-8")
- args.siteurl = args.siteurl or padurlbase
- args.utcnow = datetime.utcnow().strftime("%a, %d %b %Y %H:%M:%S +0000")
-
- # order items & apply limit
- args.pads.sort(key=lambda x: x.get("lastedited_iso"), reverse=not args.chronological)
- if args.limit:
- args.pads = args.pads[:args.limit]
-
- # add versions_by_type, add in full text
- # add link (based on args.link)
- linkversions = args.link.split(",")
- linkbase = args.linkbase or url_base(args.feedurl)
- # print ("linkbase", linkbase, args.linkbase, args.feedurl)
-
- for p in pads:
- versions_by_type = {}
- p["versions_by_type"] = versions_by_type
- for v in p["versions"]:
- t = v["type"]
- versions_by_type[t] = v
- with open (versions_by_type["text"]["path"]) as f:
- p["text"] = f.read().decode("utf-8")
-
- # ADD IN LINK
- for v in linkversions:
- vdata = versions_by_type[v]
- try:
- if v == "pad" or os.path.exists(vdata["path"]):
- p["link"] = absurl(vdata["url"], linkbase)
- break
- except KeyError as e:
- pass
-
- print (template.render(vars(args)).encode("utf-8"))