index magic

This commit is contained in:
Michael Murtaugh 2018-02-22 15:40:27 +01:00
parent d8d37255a0
commit ca4276e93b
2 changed files with 129 additions and 19 deletions

View File

@ -1,7 +1,8 @@
from __future__ import print_function from __future__ import print_function
from argparse import ArgumentParser from argparse import ArgumentParser
import sys, json, re, os import sys, json, re, os, time
from datetime import datetime from datetime import datetime
import dateutil.parser
try: try:
# python2 # python2
@ -27,6 +28,7 @@ index:
""" """
def group (items, key=lambda x: x): def group (items, key=lambda x: x):
""" returns a list of lists, of items grouped by a key function """
ret = [] ret = []
keys = {} keys = {}
for item in items: for item in items:
@ -39,8 +41,19 @@ def group (items, key=lambda x: x):
ret.append(keys[k]) ret.append(keys[k])
return ret return ret
# def base (x):
# return re.sub(r"(\.raw\.html)|(\.diff\.html)|(\.meta\.json)|(\.raw\.txt)$", "", x)
def splitextlong (x):
""" split "long" extensions, i.e. foo.bar.baz => ('foo', '.bar.baz') """
m = re.search(r"^(.*?)(\..*)$", x)
if m:
return m.groups()
else:
return x, ''
def base (x): def base (x):
return re.sub(r"(\.raw\.html)|(\.diff\.html)|(\.meta\.json)|(\.raw\.txt)$", "", x) return splitextlong(x)[0]
def excerpt (t, chars=25): def excerpt (t, chars=25):
if len(t) > chars: if len(t) > chars:
@ -60,10 +73,18 @@ def url_base (url):
ret += "/" ret += "/"
return ret return ret
def datetimeformat (t, format='%Y-%m-%d %H:%M:%S'):
if type(t) == str:
dt = dateutil.parser.parse(t)
return dt.strftime(format)
else:
return time.strftime(format, time.localtime(t))
def main (args): def main (args):
p = ArgumentParser("Convert dumped files to a document via a template.") p = ArgumentParser("Convert dumped files to a document via a template.")
p.add_argument("input", nargs="+", help="filenames (uses .meta.json files)") p.add_argument("input", nargs="+", help="Files to list (.meta.json files)")
p.add_argument("--templatepath", default=None, help="path to find templates, default: built-in") p.add_argument("--templatepath", default=None, help="path to find templates, default: built-in")
p.add_argument("--template", default="index.html", help="template name, built-ins include index.html, rss.xml; default: index.html") p.add_argument("--template", default="index.html", help="template name, built-ins include index.html, rss.xml; default: index.html")
p.add_argument("--padinfo", default=".etherdump/settings.json", help="settings, default: ./.etherdump/settings.json") p.add_argument("--padinfo", default=".etherdump/settings.json", help="settings, default: ./.etherdump/settings.json")
@ -103,19 +124,55 @@ def main (args):
env = Environment(loader=FileSystemLoader(tmpath)) env = Environment(loader=FileSystemLoader(tmpath))
env.filters["excerpt"] = excerpt env.filters["excerpt"] = excerpt
env.filters["datetimeformat"] = datetimeformat
template = env.get_template(args.template) template = env.get_template(args.template)
info = loadpadinfo(args.padinfo) info = loadpadinfo(args.padinfo)
inputs = args.input inputs = args.input
inputs.sort() inputs.sort()
inputs = group(inputs, base) # Use "base" to strip (longest) extensions
# inputs = group(inputs, base)
def loadmeta(paths): def wrappath (p):
path = "./{0}".format(p)
ext = os.path.splitext(p)[1][1:]
return {
"url": path,
"path": path,
"code": 200,
"type": ext
}
def metaforpaths (paths):
ret = {}
pid = base(paths[0])
ret['pad'] = ret['padid'] = pid
ret['versions'] = [wrappath(x) for x in paths]
lastedited = None
for p in paths: for p in paths:
if p.endswith(".meta.json"): mtime = os.stat(p).st_mtime
with open(p) as f: if lastedited == None or mtime > lastedited:
return json.load(f) lastedited = mtime
ret["lastedited_iso"] = datetime.fromtimestamp(lastedited).strftime("%Y-%m-%dT%H:%M:%S")
ret["lastedited_raw"] = mtime
return ret
def loadmeta(p):
# Consider a set of grouped files
# Otherwise, create a "dummy" one that wraps all the files as versions
if p.endswith(".meta.json"):
with open(p) as f:
return json.load(f)
# # IF there is a .meta.json, load it & MERGE with other files
# if ret:
# # TODO: merge with other files
# for p in paths:
# if "./"+p not in ret['versions']:
# ret['versions'].append(wrappath(p))
# return ret
# else:
# return metaforpaths(paths)
def fixdates (padmeta): def fixdates (padmeta):
d = dateutil.parser.parse(padmeta["lastedited_iso"]) d = dateutil.parser.parse(padmeta["lastedited_iso"])
@ -124,9 +181,59 @@ def main (args):
return padmeta return padmeta
pads = map(loadmeta, inputs) pads = map(loadmeta, inputs)
pads = [x for x in pads if x != None]
pads = map(fixdates, pads) pads = map(fixdates, pads)
args.pads = list(pads) args.pads = list(pads)
inputs = args.input
inputs.sort()
removelist = []
def has_version (padinfo, path):
return [x for x in padinfo['versions'] if 'path' in x and x['path'] == "./"+path]
pads_by_base = {}
for p in args.pads:
# print ("Trying padid", p['padid'], file=sys.stderr)
padbase = os.path.splitext(p['padid'])[0]
pads_by_base[padbase] = p
padbases = list(pads_by_base.keys())
# SORT THEM LONGEST FIRST TO ensure that LONGEST MATCHES MATCH
padbases.sort(key=lambda x: len(x), reverse=True)
# print ("PADBASES", file=sys.stderr)
# for pb in padbases:
# print (" ", pb, file=sys.stderr)
def could_have_base (x, y):
return x == y or (x.startswith(y) and x[len(y):].startswith("."))
def get_best_pad (x):
for pb in padbases:
p = pads_by_base[pb]
if could_have_base(x, pb):
return p
for x in inputs:
# pair input with a pad if possible
p = get_best_pad(x)
if p:
if not has_version(p, x):
print ("Grouping file {0} with pad {1}".format(x, p['padid']), file=sys.stderr)
p['versions'].append(wrappath(x))
# else:
# print ("Skipping existing version {0} ({1})...".format(x, p['padid']), file=sys.stderr)
removelist.append(x)
# Removed Matches files
for x in removelist:
inputs.remove(x)
# print ("Remaining files:", file=sys.stderr)
# for x in inputs:
# print (x, file=sys.stderr)
# print (file=sys.stderr)
# Add "fake" pads for remaining files
for x in inputs:
args.pads.append(metaforpaths([x]))
if args.timestamp == None: if args.timestamp == None:
args.timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S") args.timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
@ -165,18 +272,21 @@ def main (args):
for v in p["versions"]: for v in p["versions"]:
t = v["type"] t = v["type"]
versions_by_type[t] = v versions_by_type[t] = v
with open (versions_by_type["text"]["path"]) as f:
p["text"] = f.read()
# ADD IN LINK if "text" in versions_by_type:
with open (versions_by_type["text"]["path"]) as f:
p["text"] = f.read()
# ADD IN LINK TO PAD AS "link"
for v in linkversions: for v in linkversions:
vdata = versions_by_type[v] if v in versions_by_type:
try: vdata = versions_by_type[v]
if v == "pad" or os.path.exists(vdata["path"]): try:
p["link"] = absurl(vdata["url"], linkbase) if v == "pad" or os.path.exists(vdata["path"]):
break p["link"] = absurl(vdata["url"], linkbase)
except KeyError as e: break
pass except KeyError as e:
pass
if args.output: if args.output:
with open(args.output, "w") as f: with open(args.output, "w") as f:

View File

@ -92,7 +92,7 @@ $(document).ready(function()
<td class="versions"> <td class="versions">
{% for v in pad.versions %}<a href="{{v.url}}">{{v.type}}</a> {% endfor %} {% for v in pad.versions %}<a href="{{v.url}}">{{v.type}}</a> {% endfor %}
</td> </td>
<td class="lastedited">{{ pad.lastedited_iso|replace("T", " ") }}</td> <td class="lastedited">{{ pad.lastedited_iso|datetimeformat }}</td>
<td class="revisions">{{ pad.revisions }}</td> <td class="revisions">{{ pad.revisions }}</td>
<td class="authors">{{ pad.author_ids|length }}</td> <td class="authors">{{ pad.author_ids|length }}</td>
</tr> </tr>